From 9cd7554d38058546114683d5f63b937255fbf81f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 27 Jul 2021 23:21:27 +0200 Subject: [PATCH] deps!: BigQuery Storage and pyarrow are required dependencies (#776) * process: make BQ Storage and pyarrow required * Make pyarrow required in _pandas_helpers.py * Make pyarrow required in client.py * Make pyarrow required in table.py * Make pyarrow required in job/query.py * Make pyarrow required in DBAPI tests * Make pyarrow required in snippets tests * Make BQ storage required in client.py * Make BQ storage required in table.py * Make BQ storage required in DB API tests * Make BQ storage required in magics.py * Make BQ storage required in test__helpers.py * Make BQ storage required in test__pandas_helpers.py * Make BQ storage required in test_query_pandas.py * Make method signatures compatible again The annotations caused a mismatch * Remove checks for minimum BQ Storage version Since this is now a required dependency, there should not be any more pip quirks that used to allow installing BQ Storage as an extra, but without always respecting its minimum version pin. * Remove LegacyBigQueryStorageError Since it will be released in a major version bump, we can make this a breaking change, i.e. without deprecation. * Bump minimum pyarrow version to 3.0.0 * Remove unneeded pytest.importorskip for BQ Storage * Remove pyarrow version checks in pandas helpers tests * Conditionally skip pandas tests where needed * Remove unneeded conditional pyarrow version paths * Cover schema autodetect failed code path in test * fix bad merge Co-authored-by: Tim Swast --- docs/snippets.py | 4 - google/cloud/bigquery/__init__.py | 3 - google/cloud/bigquery/_helpers.py | 26 --- google/cloud/bigquery/_pandas_helpers.py | 122 ++++------ google/cloud/bigquery/client.py | 67 +----- google/cloud/bigquery/exceptions.py | 21 -- google/cloud/bigquery/job/query.py | 11 +- google/cloud/bigquery/magics/magics.py | 11 - google/cloud/bigquery/table.py | 52 +---- setup.py | 24 +- testing/constraints-3.6.txt | 2 +- tests/system/test_client.py | 38 +-- tests/system/test_pandas.py | 35 ++- tests/unit/job/test_query_pandas.py | 27 +-- tests/unit/test__helpers.py | 37 --- tests/unit/test__pandas_helpers.py | 145 ++---------- tests/unit/test_client.py | 281 +++++------------------ tests/unit/test_dbapi__helpers.py | 6 - tests/unit/test_dbapi_connection.py | 22 +- tests/unit/test_dbapi_cursor.py | 26 +-- tests/unit/test_magics.py | 79 +------ tests/unit/test_table.py | 155 +------------ 22 files changed, 196 insertions(+), 998 deletions(-) delete mode 100644 google/cloud/bigquery/exceptions.py diff --git a/docs/snippets.py b/docs/snippets.py index 3f9b9a88c..82e07901e 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -30,10 +30,6 @@ import pandas except (ImportError, AttributeError): pandas = None -try: - import pyarrow -except (ImportError, AttributeError): - pyarrow = None from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 222aadcc9..59bb08ce5 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -42,7 +42,6 @@ from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -171,8 +170,6 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", - # Custom exceptions - "LegacyBigQueryStorageError", ] diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index bf0f80e22..3d83ddee9 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -28,8 +28,6 @@ from google.cloud._helpers import _to_bytes import packaging.version -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -41,7 +39,6 @@ re.VERBOSE, ) -_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -75,29 +72,6 @@ def is_read_session_optional(self) -> bool: """ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION - def verify_version(self): - """Verify that a recent enough version of BigQuery Storage extra is - installed. - - The function assumes that google-cloud-bigquery-storage extra is - installed, and should thus be used in places where this assumption - holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Raises: - LegacyBigQueryStorageError: - If the google-cloud-bigquery-storage package is outdated. - """ - if self.installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {self.installed_version})." - ) - raise LegacyBigQueryStorageError(msg) - BQ_STORAGE_VERSIONS = BQStorageVersions() diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index b381fa5f7..817930ddd 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,18 +20,13 @@ import queue import warnings -from packaging import version - try: import pandas except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.parquet -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow +import pyarrow.parquet try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -106,63 +101,52 @@ def pyarrow_timestamp(): return pyarrow.timestamp("us", tz="UTC") -if pyarrow: - # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py - # When modifying it be sure to update it there as well. - BQ_TO_ARROW_SCALARS = { - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, - } - ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False - -else: # pragma: NO COVER - BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER - ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER +# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py +# When modifying it be sure to update it there as well. +BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, +} +ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", +} def bq_to_arrow_struct_data_type(field): @@ -346,13 +330,6 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # If schema detection was not successful for all columns, also try with # pyarrow, if available. if unknown_type_fields: - if not pyarrow: - msg = u"Could not determine the type of columns: {}".format( - ", ".join(field.name for field in unknown_type_fields) - ) - warnings.warn(msg) - return None # We cannot detect the schema in full. - # The augment_schema() helper itself will also issue unknown type # warnings if detection still fails for any of the fields. bq_schema_out = augment_schema(dataframe, bq_schema_out) @@ -494,9 +471,6 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN serializing method. Defaults to "SNAPPY". https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table """ - if pyarrow is None: - raise ValueError("pyarrow is required for BigQuery schema conversion.") - bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 742ecac2e..acae2fe36 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,19 +27,11 @@ import json import math import os -import packaging.version import tempfile from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid import warnings -try: - import pyarrow - - _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload @@ -53,18 +45,14 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # pytype: disable=import-error -try: - from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( - DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, - ) -except ImportError: - DEFAULT_BQSTORAGE_CLIENT_INFO = None +from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, +) from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -72,7 +60,6 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -121,9 +108,6 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 -# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 -_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) - class Project(object): """Wrapper for resource describing a BigQuery project. @@ -483,17 +467,10 @@ def _ensure_bqstorage_client( ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to a missing or outdated dependency - `google-cloud-bigquery-storage`, raise a warning and return ``None``. - - If the `bqstorage_client` argument is not ``None``, still perform the version - check and return the argument back to the caller if the check passes. If it - fails, raise a warning and return ``None``. - Args: bqstorage_client: - An existing BigQuery Storage client instance to check for version - compatibility. If ``None``, a new instance is created and returned. + An existing BigQuery Storage client instance. If ``None``, a new + instance is created and returned. client_options: Custom options used with a new BigQuery Storage client instance if one is created. @@ -504,20 +481,7 @@ def _ensure_bqstorage_client( Returns: A BigQuery Storage API client. """ - try: - from google.cloud import bigquery_storage - except ImportError: - warnings.warn( - "Cannot create BigQuery Storage client, the dependency " - "google-cloud-bigquery-storage is not installed." - ) - return None - - try: - BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return None + from google.cloud import bigquery_storage if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( @@ -2496,7 +2460,7 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. - Indexes are not loaded. Requires the :mod:`pyarrow` library. + Indexes are not loaded. By default, this method uses the parquet source format. To override this, supply a value for @@ -2526,9 +2490,6 @@ def load_table_from_dataframe( google.cloud.bigquery.job.LoadJob: A new load job. Raises: - ValueError: - If a usable parquet engine cannot be found. This method - requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. @@ -2556,10 +2517,6 @@ def load_table_from_dataframe( ) ) - if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: - # pyarrow is now the only supported parquet engine. - raise ValueError("This method requires pyarrow to be installed") - if location is None: location = self.location @@ -2615,16 +2572,6 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: - if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: - msg = ( - "Loading dataframe data in PARQUET format with pyarrow " - f"{_PYARROW_VERSION} can result in data corruption. It is " - "therefore *strongly* advised to use a different pyarrow " - "version or a different source format. " - "See: https://github.com/googleapis/python-bigquery/issues/781" - ) - warnings.warn(msg, category=RuntimeWarning) - if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py deleted file mode 100644 index 6e5c27eb1..000000000 --- a/google/cloud/bigquery/exceptions.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class BigQueryError(Exception): - """Base class for all custom exceptions defined by the BigQuery client.""" - - -class LegacyBigQueryStorageError(BigQueryError): - """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 2cb7ee28e..f4a78a9da 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1342,7 +1342,7 @@ def result( def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, ) -> "pyarrow.Table": @@ -1373,8 +1373,7 @@ def to_arrow( BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. Reading from a specific partition or snapshot is not currently supported by this method. @@ -1399,10 +1398,6 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) @@ -1417,7 +1412,7 @@ def to_arrow( # that should only exist here in the QueryJob method. def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2b8c2928e..dce911232 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -766,17 +766,6 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError as err: - customized_error = ImportError( - "The default BigQuery Storage API client cannot be used, install " - "the missing google-cloud-bigquery-storage and pyarrow packages " - "to use it. Alternatively, use the classic REST API by specifying " - "the --use_rest_api magic option." - ) - raise customized_error from err - try: from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 18d969a3f..a058dca91 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -30,10 +30,7 @@ except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator @@ -41,7 +38,6 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -50,21 +46,17 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER + from google.cloud import bigquery_storage + # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas - import pyarrow - from google.cloud import bigquery_storage _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) -_NO_PYARROW_ERROR = ( - "The pyarrow library is not installed, please install " - "pyarrow to use the to_arrow() function." -) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -1559,17 +1551,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError: - return False - - try: - _helpers.BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return False - return True def _get_next_page_response(self): @@ -1641,7 +1622,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a @@ -1670,8 +1651,7 @@ def to_arrow( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1692,14 +1672,8 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: If the :mod:`pyarrow` library cannot be imported. - .. versionadded:: 1.17.0 """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1743,7 +1717,7 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, ) -> "pandas.DataFrame": @@ -1754,8 +1728,7 @@ def to_dataframe_iterable( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1818,7 +1791,7 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, @@ -1831,8 +1804,7 @@ def to_dataframe( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1886,9 +1858,7 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1` module is - required but cannot be imported. + If the :mod:`pandas` library cannot be imported. """ if pandas is None: @@ -1974,8 +1944,6 @@ def to_arrow( Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) def to_dataframe( diff --git a/setup.py b/setup.py index 0ca19b576..91458bb78 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.29.0, <3.0.0dev", "proto-plus >= 1.10.0", + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 @@ -42,22 +43,14 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "pyarrow >= 3.0.0, < 5.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 5.0dev", - ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], + # Keep the no-op bqstorage extra for backward compatibility. + # See: https://github.com/googleapis/python-bigquery/issues/757 + "bqstorage": [], + "pandas": ["pandas>=0.23.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", @@ -69,11 +62,6 @@ all_extras = [] for extra in extras: - # Exclude this extra from all to avoid overly strict dependencies on core - # libraries such as pyarrow. - # https://github.com/googleapis/python-bigquery/issues/563 - if extra in {"bignumeric_type"}: - continue all_extras.extend(extras[extra]) extras["all"] = all_extras diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index af6e82efd..ce012f0d7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0 pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 -pyarrow==1.0.0 +pyarrow==3.0.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index baa2b6ad8..4970ef281 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -30,24 +30,15 @@ import psutil import pytest -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - try: import fastavro # to parse BQ storage client results except ImportError: # pragma: NO COVER fastavro = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow +import pyarrow.types from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest @@ -66,6 +57,7 @@ from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums +from google.cloud import bigquery_storage from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient @@ -1602,10 +1594,6 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -1664,9 +1652,6 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -1869,6 +1854,11 @@ def test_query_w_query_params(self): "expected": pi_numeric, "query_parameters": [pi_numeric_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, { "sql": "SELECT @truthy", "expected": truthy, @@ -1939,14 +1929,6 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] - if _BIGNUMERIC_SUPPORT: - examples.append( - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - } - ) for example in examples: jconfig = QueryJobConfig() @@ -2333,10 +2315,6 @@ def test_create_table_rows_fetch_nested_schema(self): def _fetch_dataframe(self, query): return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index ddf5eaf43..4b1828c86 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -26,15 +26,11 @@ import pytz from google.cloud import bigquery -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from google.cloud import bigquery_storage from . import helpers -bigquery_storage = pytest.importorskip( - "google.cloud.bigquery_storage", minversion="2.0.0" -) pandas = pytest.importorskip("pandas", minversion="0.23.0") -pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version @@ -184,12 +180,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -211,12 +206,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -292,12 +286,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -335,6 +328,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -349,17 +350,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ], ), ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c537802f4..e5105974f 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -17,20 +17,16 @@ import json import mock +import pyarrow import pytest try: import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None + +from google.cloud import bigquery_storage + try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -83,9 +79,6 @@ def test__contains_order_by(query, expected): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "query", ( @@ -151,7 +144,6 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class @@ -238,7 +230,6 @@ def test_to_arrow(): ] -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class @@ -273,7 +264,6 @@ def test_to_arrow_max_results_no_progress_bar(): assert tbl.num_rows == 2 -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_query_plan(): from google.cloud.bigquery import table @@ -330,7 +320,6 @@ def test_to_arrow_w_tqdm_w_query_plan(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_pending_status(): from google.cloud.bigquery import table @@ -383,7 +372,6 @@ def test_to_arrow_w_tqdm_w_pending_status(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_wo_query_plan(): from google.cloud.bigquery import table @@ -485,9 +473,6 @@ def test_to_dataframe_ddl_query(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -538,9 +523,6 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class @@ -643,7 +625,6 @@ def test_to_dataframe_column_dtypes(): assert df.date.dtype.name == "object" -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index af026ccbe..aaafdb0f7 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,50 +19,13 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - -@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): def _object_under_test(self): from google.cloud.bigquery import _helpers return _helpers.BQStorageVersions() - def _call_fut(self): - from google.cloud.bigquery import _helpers - - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - return _helpers.BQ_STORAGE_VERSIONS.verify_version() - - def test_raises_no_error_w_recent_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): - try: - self._call_fut() - except LegacyBigQueryStorageError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_raises_error_w_legacy_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(LegacyBigQueryStorageError): - self._call_fut() - - def test_raises_error_w_unknown_bqstorage_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: - del fake_module.__version__ - error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): - self._call_fut() - def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0ba671cd9..85c507b2a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -29,27 +29,18 @@ import pandas.testing except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - # Mock out pyarrow when missing, because methods from pyarrow.types are - # used in test parameterization. - pyarrow = mock.Mock() + +import pyarrow +import pyarrow.types import pytest import pytz from google import api_core from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT -try: - from google.cloud import bigquery_storage +from google.cloud import bigquery_storage - _helpers.BQ_STORAGE_VERSIONS.verify_version() -except ImportError: # pragma: NO COVER - bigquery_storage = None PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -60,11 +51,6 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") -skip_if_no_bignumeric = pytest.mark.skipif( - not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", -) - - @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -120,7 +106,6 @@ def all_(*functions): return functools.partial(do_all, functions) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -153,9 +138,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - pytest.param( - "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, - ), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -234,11 +217,10 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - pytest.param( + ( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), - marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -294,7 +276,6 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -302,7 +283,6 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -312,6 +292,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -321,9 +302,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -335,6 +313,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -343,8 +322,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected = pyarrow.struct(expected) assert pyarrow.types.is_struct(actual) @@ -353,7 +330,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -363,6 +339,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -372,9 +349,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -386,6 +360,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -394,8 +369,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected_value_type = pyarrow.struct(expected) assert pyarrow.types.is_list(actual) @@ -404,7 +377,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -441,7 +413,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - pytest.param( + ( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -449,7 +421,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], - marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), @@ -502,7 +473,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -537,7 +507,6 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] series = pandas.Series(rows) @@ -548,7 +517,6 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -560,7 +528,6 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -582,7 +549,6 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -599,7 +565,6 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -861,7 +826,6 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( schema.SchemaField("str_index", "STRING"), @@ -928,7 +892,6 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -938,6 +901,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -946,8 +910,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - if _BIGNUMERIC_SUPPORT: - bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) data = { "field01": ["hello", "world"], @@ -957,6 +919,10 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], "field09": [True, False], "field10": [False, True], "field11": [ @@ -971,11 +937,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ], "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], } - if _BIGNUMERIC_SUPPORT: - data["field08"] = [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ] dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) @@ -987,7 +948,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -1020,7 +980,6 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1042,15 +1001,6 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): - monkeypatch.setattr(module_under_test, "pyarrow", None) - with pytest.raises(ValueError) as exc_context: - module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) - assert "pyarrow is required" in str(exc_context.value) - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1062,7 +1012,6 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1074,7 +1023,6 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -1094,34 +1042,6 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): - dataframe = pandas.DataFrame( - data=[ - {"id": 10, "status": u"FOO", "execution_date": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, - ] - ) - - no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) - - with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: - detected_schema = module_under_test.dataframe_to_bq_schema( - dataframe, bq_schema=[] - ) - - assert detected_schema is None - - # a warning should also be issued - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - msg = str(expected_warnings[0]) - assert "execution_date" in msg and "created_at" in msg - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1151,7 +1071,6 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1176,7 +1095,6 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1210,11 +1128,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) - if _BIGNUMERIC_SUPPORT: - current_schema += ( - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1236,20 +1151,16 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) - if _BIGNUMERIC_SUPPORT: - expected_schema += ( - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), - ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1285,8 +1196,9 @@ def test_augment_schema_type_detection_fails(module_under_test): assert "struct_field" in warning_msg and "struct_field_2" in warning_msg -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): + pandas = pytest.importorskip("pandas") + dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, {"name": "field02", "type": "BOOL", "mode": "NULLABLE"}, @@ -1314,9 +1226,6 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage_stream_includes_read_session( monkeypatch, module_under_test ): @@ -1347,8 +1256,7 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( - bigquery_storage is None - or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1388,9 +1296,6 @@ def test__download_table_bqstorage_stream_omits_read_session( (7, {"max_queue_size": None}, 7, 0), # infinite queue size ], ) -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage( module_under_test, stream_count, @@ -1440,7 +1345,6 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1476,7 +1380,6 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test): assert col.to_pylist() == [2.2, 22.22, 222.222] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1511,7 +1414,6 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test): assert col.to_pylist() == ["2.2", "22.22", "222.222"] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1539,7 +1441,6 @@ def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 535685511..0dc9c3f55 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,7 +27,6 @@ import warnings import mock -import packaging import requests import pytest import pytz @@ -47,22 +46,14 @@ ) except (ImportError, AttributeError): # pragma: NO COVER opentelemetry = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers +from google.cloud import bigquery_storage from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None -from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -605,9 +596,6 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() @@ -630,55 +618,6 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) - def test_ensure_bqstorage_client_missing_dependency(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning - for warning in warned - if "not installed" in str(warning) - and "google-cloud-bigquery-storage" in str(warning) - ] - assert matching_warnings, "Missing dependency warning not raised." - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_ensure_bqstorage_client_obsolete_dependency(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -690,29 +629,6 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_ensure_bqstorage_client_existing_client_check_fails(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - mock_storage_client = mock.sentinel.mock_storage_client - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -6679,7 +6595,6 @@ def test_load_table_from_file_w_invalid_job_config(self): assert "Expected an instance of LoadJobConfig" in err_msg @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6766,7 +6681,6 @@ def test_load_table_from_dataframe(self): assert "description" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6811,7 +6725,6 @@ def test_load_table_from_dataframe_w_client_location(self): assert sent_config.source_format == job.SourceFormat.PARQUET @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6866,7 +6779,6 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6922,7 +6834,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): from google.cloud.bigquery import job @@ -6942,7 +6853,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7019,7 +6929,65 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + + df_data = [ + [[{"name": "n1.1", "value": 1.1}, {"name": "n1.2", "value": 1.2}]], + [[{"name": "n2.1", "value": 2.1}, {"name": "n2.2", "value": 2.2}]], + ] + dataframe = pandas.DataFrame(df_data, columns=["col_record_list"]) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + + with load_patch as load_table_from_file, get_table_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + # There should be a warning that schema detection failed. + expected_warnings = [ + warning + for warning in warned + if "schema could not be detected" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass( + expected_warnings[0].category, + (DeprecationWarning, PendingDeprecationWarning), + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=None, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema is None + + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7081,7 +7049,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): assert sent_schema == expected_sent_schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -7120,7 +7087,6 @@ def test_load_table_from_dataframe_unknown_table(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7168,7 +7134,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7213,7 +7178,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7273,7 +7237,6 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7357,7 +7320,6 @@ def test_load_table_from_dataframe_w_partial_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7394,63 +7356,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_partial_schema_missing_types(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES - from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField - - client = self._make_client() - df_data = collections.OrderedDict( - [ - ("string_col", ["abc", "def", "ghi"]), - ("unknown_col", [b"jkl", None, b"mno"]), - ] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - - schema = (SchemaField("string_col", "STRING"),) - job_config = job.LoadJobConfig(schema=schema) - with pyarrow_patch, load_patch as load_table_from_file, warnings.catch_warnings( - record=True - ) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, - self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - size=mock.ANY, - job_id=mock.ANY, - job_id_prefix=None, - location=self.LOCATION, - project=None, - job_config=mock.ANY, - timeout=None, - ) - - assert warned # there should be at least one warning - unknown_col_warnings = [ - warning for warning in warned if "unknown_col" in str(warning) - ] - assert unknown_col_warnings - assert unknown_col_warnings[0].category == UserWarning - - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema is None - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7483,72 +7388,6 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args.kwargs.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): - client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - to_parquet_patch = mock.patch.object( - dataframe, "to_parquet", wraps=dataframe.to_parquet - ) - - with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: - with pytest.raises(ValueError): - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - parquet_compression="gzip", - ) - - def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): - pytest.importorskip("pandas", reason="Requires `pandas`") - pytest.importorskip("pyarrow", reason="Requires `pyarrow`") - - client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) - - pyarrow_version_patch = mock.patch( - "google.cloud.bigquery.client._PYARROW_VERSION", - packaging.version.parse("2.0.0"), # A known bad version of pyarrow. - ) - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - - with load_patch, get_table_patch, pyarrow_version_patch: - with warnings.catch_warnings(record=True) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION, - ) - - expected_warnings = [ - warning for warning in warned if "pyarrow" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - assert issubclass(expected_warnings[0].category, RuntimeWarning) - msg = str(expected_warnings[0].message) - assert "pyarrow 2.0.0" in msg - assert "data corruption" in msg - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index b33203354..4afc47b6c 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -21,11 +21,6 @@ import pytest -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - import google.cloud._helpers from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import _helpers @@ -215,7 +210,6 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ dict( diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 0576cad38..6b3a99439 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -17,10 +17,7 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage class TestConnection(unittest.TestCase): @@ -40,8 +37,6 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - # Assumption: bigquery_storage exists. It's the test's responisbility to - # not use this helper or skip itself if bqstroage is not installed. mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -58,9 +53,6 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -89,9 +81,6 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -107,9 +96,6 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -142,9 +128,6 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -165,9 +148,6 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 026810aaf..f075bb6f7 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -18,18 +18,8 @@ import pytest - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.api_core import exceptions - -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage from tests.unit.helpers import _to_pyarrow @@ -279,10 +269,6 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -334,9 +320,6 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -358,9 +341,6 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -390,10 +370,6 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index d030482cc..bb3a8d1fd 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -76,19 +76,6 @@ def ipython_ns_cleanup(): del ip.user_ns[name] -@pytest.fixture(scope="session") -def missing_bq_storage(): - """Provide a patcher that can make the bigquery storage import to fail.""" - - def fail_if(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - return maybe_fail_import(predicate=fail_if) - - @pytest.fixture(scope="session") def missing_grpcio_lib(): """Provide a patcher that can make the gapic library import to fail.""" @@ -324,9 +311,6 @@ def test__make_bqstorage_client_false(): assert got is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -338,53 +322,6 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) -def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(test_client, True, {}) - - error_msg = str(exc_context.value) - assert "google-cloud-bigquery-storage" in error_msg - assert "pyarrow" in error_msg - - -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) -def test__make_bqstorage_client_true_obsolete_dependency(): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - got = magics._make_bqstorage_client(test_client, True, {}) - - assert got is None - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( @@ -440,9 +377,6 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -603,10 +537,9 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -667,10 +600,9 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_rest_client_requested(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -887,9 +819,6 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 37650cd27..533ed610f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -19,36 +19,23 @@ import warnings import mock -import pkg_resources +import pyarrow +import pyarrow.types import pytest import pytz import google.api_core.exceptions -from test_utils.imports import maybe_fail_import -try: - from google.cloud import bigquery_storage - from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, - ) -except ImportError: # pragma: NO COVER - bigquery_storage = None - big_query_read_grpc_transport = None +from google.cloud import bigquery_storage +from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, +) try: import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types - - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - PYARROW_VERSION = pkg_resources.parse_version("0.0.1") - try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -57,9 +44,6 @@ from google.cloud.bigquery.dataset import DatasetReference -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - - def _mock_client(): from google.cloud.bigquery import client @@ -1619,13 +1603,6 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) - @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - def test_to_arrow_error_if_pyarrow_is_none(self): - row_iterator = self._make_one() - with self.assertRaises(ValueError): - row_iterator.to_arrow() - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): row_iterator = self._make_one() tbl = row_iterator.to_arrow() @@ -1862,49 +1839,6 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) - def test__validate_bqstorage_returns_false_if_missing_dependency(self): - iterator = self._make_one(first_page_response=None) # not cached - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - iterator = self._make_one(first_page_response=None) # not cached - - patcher = mock.patch( - "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -1986,7 +1920,6 @@ def test_to_arrow(self): ], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): from google.cloud.bigquery.schema import SchemaField @@ -2019,7 +1952,6 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): from google.cloud.bigquery.schema import SchemaField @@ -2062,7 +1994,6 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2101,10 +2032,6 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_create_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2141,10 +2068,6 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2222,10 +2145,6 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2253,7 +2172,6 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField @@ -2280,10 +2198,6 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2320,7 +2234,6 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @mock.patch("tqdm.tqdm_notebook") @@ -2456,10 +2369,6 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2573,7 +2482,6 @@ def test_to_dataframe(self): self.assertEqual(df.age.dtype.name, "int64") @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -2588,9 +2496,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) - tzinfo = None - if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = datetime.timezone.utc + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows @@ -2604,7 +2510,6 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -2963,9 +2868,6 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2994,9 +2896,6 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3022,11 +2921,7 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3048,10 +2943,6 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3103,10 +2994,6 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3183,10 +3070,6 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3237,11 +3120,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): from google.cloud.bigquery import schema @@ -3316,10 +3195,6 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3435,9 +3310,6 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertTrue(df.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut @@ -3454,9 +3326,6 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3474,9 +3343,6 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3495,10 +3361,6 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3983,9 +3845,6 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "table_path", (