From 1be66ce94a32b1f924bdda05d068c2977631af9e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Sep 2020 13:04:03 -0500 Subject: [PATCH] fix: update minimum dependency versions (#263) This PR updates the minimum dependency versions to match those that I found to be actually runnable. Updates tests to use constraint files so that at least one test session uses these minimum versions. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! - Based on internal Python Client Library Testing Improvements docs. - In response to internal bug 166792569 covering insufficient pyarrow minimum dependency. - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- noxfile.py | 74 ++++++++++++------- samples/snippets/jupyter_tutorial_test.py | 8 +- samples/tests/test_download_public_data.py | 4 + .../test_download_public_data_sandbox.py | 4 + samples/tests/test_query_to_arrow.py | 4 +- setup.py | 23 +++--- testing/constraints-2.7.txt | 9 +++ testing/constraints-3.5.txt | 12 +++ testing/constraints-3.6.txt | 0 testing/constraints-3.7.txt | 0 testing/constraints-3.8.txt | 0 tests/system.py | 19 +++-- tests/unit/test__pandas_helpers.py | 20 +++++ tests/unit/test_client.py | 2 +- tests/unit/test_dbapi_connection.py | 14 +++- tests/unit/test_magics.py | 17 ++--- tests/unit/test_table.py | 12 ++- 17 files changed, 155 insertions(+), 67 deletions(-) create mode 100644 testing/constraints-2.7.txt create mode 100644 testing/constraints-3.5.txt create mode 100644 testing/constraints-3.6.txt create mode 100644 testing/constraints-3.7.txt create mode 100644 testing/constraints-3.8.txt diff --git a/noxfile.py b/noxfile.py index 7f2dee34c..90f023add 100644 --- a/noxfile.py +++ b/noxfile.py @@ -14,6 +14,7 @@ from __future__ import absolute_import +import pathlib import os import shutil @@ -22,6 +23,7 @@ BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") +CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() def default(session): @@ -32,27 +34,33 @@ def default(session): Python corresponding to the ``nox`` binary the ``PATH`` can run the tests. """ + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Install all test dependencies, then install local packages in-place. session.install( - "mock", "pytest", "google-cloud-testutils", "pytest-cov", "freezegun" + "mock", + "pytest", + "google-cloud-testutils", + "pytest-cov", + "freezegun", + "-c", + constraints_path, ) - session.install("grpcio") - - # fastparquet is not included in .[all] because, in general, it's redundant - # with pyarrow. We still want to run some unit tests with fastparquet - # serialization, though. - session.install("-e", ".[all,fastparquet]") - # IPython does not support Python 2 after version 5.x if session.python == "2.7": - session.install("ipython==5.5") + # The [all] extra is not installable on Python 2.7. + session.install("-e", ".[pandas,pyarrow]", "-c", constraints_path) + elif session.python == "3.5": + session.install("-e", ".[all]", "-c", constraints_path) else: - session.install("ipython") + # fastparquet is not included in .[all] because, in general, it's + # redundant with pyarrow. We still want to run some unit tests with + # fastparquet serialization, though. + session.install("-e", ".[all,fastparquet]", "-c", constraints_path) - # opentelemetry was not added to [all] because opentelemetry does not support Python 2. - # Exporter does not need to be in nox thus it has been added to README documentation - if session.python != "2.7": - session.install("-e", ".[opentelemetry]") + session.install("ipython", "-c", constraints_path) # Run py.test against the unit tests. session.run( @@ -79,6 +87,10 @@ def unit(session): def system(session): """Run the system test suite.""" + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": session.skip("RUN_SYSTEM_TESTS is set to false, skipping") @@ -88,18 +100,21 @@ def system(session): session.skip("Credentials must be set via environment variable.") # Use pre-release gRPC for system tests. - session.install("--pre", "grpcio") + session.install("--pre", "grpcio", "-c", constraints_path) # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest", "psutil", "google-cloud-testutils") - session.install("google-cloud-storage") - session.install("-e", ".[all]") + session.install( + "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + ) + session.install("google-cloud-storage", "-c", constraints_path) - # IPython does not support Python 2 after version 5.x if session.python == "2.7": - session.install("ipython==5.5") + # The [all] extra is not installable on Python 2.7. + session.install("-e", ".[pandas]", "-c", constraints_path) else: - session.install("ipython") + session.install("-e", ".[all]", "-c", constraints_path) + + session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. session.run( @@ -111,15 +126,24 @@ def system(session): def snippets(session): """Run the snippets test suite.""" + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Sanity check: Only run snippets tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest", "google-cloud-testutils") - session.install("google-cloud-storage") - session.install("grpcio") - session.install("-e", ".[all]") + session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) + session.install("google-cloud-storage", "-c", constraints_path) + session.install("grpcio", "-c", constraints_path) + + if session.python == "2.7": + # The [all] extra is not installable on Python 2.7. + session.install("-e", ".[pandas]", "-c", constraints_path) + else: + session.install("-e", ".[all]", "-c", constraints_path) # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session diff --git a/samples/snippets/jupyter_tutorial_test.py b/samples/snippets/jupyter_tutorial_test.py index 353590b82..7fe1cde85 100644 --- a/samples/snippets/jupyter_tutorial_test.py +++ b/samples/snippets/jupyter_tutorial_test.py @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import IPython -from IPython.terminal import interactiveshell -from IPython.testing import tools -import matplotlib import pytest +IPython = pytest.importorskip("IPython") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") +tools = pytest.importorskip("IPython.testing.tools") +matplotlib = pytest.importorskip("matplotlib") # Ignore semicolon lint warning because semicolons are used in notebooks # flake8: noqa E703 diff --git a/samples/tests/test_download_public_data.py b/samples/tests/test_download_public_data.py index 82297b203..2412c147f 100644 --- a/samples/tests/test_download_public_data.py +++ b/samples/tests/test_download_public_data.py @@ -14,8 +14,12 @@ import logging +import pytest + from .. import download_public_data +pytest.importorskip("google.cloud.bigquery_storage_v1") + def test_download_public_data(caplog, capsys): # Enable debug-level logging to verify the BigQuery Storage API is used. diff --git a/samples/tests/test_download_public_data_sandbox.py b/samples/tests/test_download_public_data_sandbox.py index e322cb2e5..08e1aab73 100644 --- a/samples/tests/test_download_public_data_sandbox.py +++ b/samples/tests/test_download_public_data_sandbox.py @@ -14,8 +14,12 @@ import logging +import pytest + from .. import download_public_data_sandbox +pytest.importorskip("google.cloud.bigquery_storage_v1") + def test_download_public_data_sandbox(caplog, capsys): # Enable debug-level logging to verify the BigQuery Storage API is used. diff --git a/samples/tests/test_query_to_arrow.py b/samples/tests/test_query_to_arrow.py index 77d3f7130..f14ce5561 100644 --- a/samples/tests/test_query_to_arrow.py +++ b/samples/tests/test_query_to_arrow.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pyarrow +import pytest from .. import query_to_arrow +pyarrow = pytest.importorskip("pyarrow") + def test_query_to_arrow(capsys,): diff --git a/setup.py b/setup.py index d23f77b1c..6e66ae4cd 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ 'enum34; python_version < "3.4"', "google-api-core >= 1.21.0, < 2.0dev", "google-cloud-core >= 1.4.1, < 2.0dev", - "google-resumable-media >= 0.5.0, < 2.0dev", + "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] extras = { @@ -41,18 +41,19 @@ # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 - "grpcio >= 1.8.2, < 2.0dev", - "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.32.0, < 2.0dev", + "pyarrow >= 1.0.0, < 2.0dev", ], - "pandas": ["pandas>=0.17.1"], - # Exclude PyArrow dependency from Windows Python 2.7. + "pandas": ["pandas>=0.23.0"], "pyarrow": [ - "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", - # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. - "pyarrow < 0.17.0; python_version < '3.0' and platform_system != 'Windows'", + # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. + "pyarrow >= 1.0.0, < 2.0de ; python_version>='3.5'", + "pyarrow >= 0.16.0, < 0.17.0dev ; python_version<'3.5'", ], - "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], + "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "fastparquet": [ "fastparquet", "python-snappy", @@ -77,8 +78,6 @@ # creates a dependency on pre-release versions of numpy. See: # https://github.com/googleapis/google-cloud-python/issues/8549 "fastparquet", - # Skip opentelemetry because the library is not compatible with Python 2. - "opentelemetry", ): continue all_extras.extend(extras[extra]) diff --git a/testing/constraints-2.7.txt b/testing/constraints-2.7.txt new file mode 100644 index 000000000..fafbaa27f --- /dev/null +++ b/testing/constraints-2.7.txt @@ -0,0 +1,9 @@ +google-api-core==1.21.0 +google-cloud-core==1.4.1 +google-cloud-storage==1.30.0 +google-resumable-media==0.6.0 +ipython==5.5 +pandas==0.23.0 +pyarrow==0.16.0 +six==1.13.0 +tqdm==4.7.4 \ No newline at end of file diff --git a/testing/constraints-3.5.txt b/testing/constraints-3.5.txt new file mode 100644 index 000000000..a262dbe5f --- /dev/null +++ b/testing/constraints-3.5.txt @@ -0,0 +1,12 @@ +google-api-core==1.21.0 +google-cloud-bigquery-storage==1.0.0 +google-cloud-core==1.4.1 +google-resumable-media==0.6.0 +google-cloud-storage==1.30.0 +grpcio==1.32.0 +ipython==5.5 +# pandas 0.23.0 is the first version to work with pyarrow to_pandas. +pandas==0.23.0 +pyarrow==1.0.0 +six==1.13.0 +tqdm==4.7.4 \ No newline at end of file diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt new file mode 100644 index 000000000..e69de29bb diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt new file mode 100644 index 000000000..e69de29bb diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/system.py b/tests/system.py index 0048c13e0..02cc8e139 100644 --- a/tests/system.py +++ b/tests/system.py @@ -64,6 +64,7 @@ from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest +from google.api_core.exceptions import ClientError from google.api_core.exceptions import Conflict from google.api_core.exceptions import Forbidden from google.api_core.exceptions import GoogleAPICallError @@ -130,9 +131,17 @@ ) PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") -PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version + +if pandas: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = None + +if pyarrow: + PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version +else: + PYARROW_INSTALLED_VERSION = None def _has_rows(result): @@ -1312,9 +1321,9 @@ def test_load_table_from_file_w_explicit_location(self): self.assertEqual("EU", load_job.location) # Cannot cancel the job from the US. - with self.assertRaises(NotFound): + with self.assertRaises(ClientError): client.cancel_job(job_id, location="US") - with self.assertRaises(NotFound): + with self.assertRaises(ClientError): load_job_us.cancel() # Can list the table rows. @@ -2897,7 +2906,7 @@ def test_bigquery_magic(): LIMIT 10 """ with io.capture_output() as captured: - result = ip.run_cell_magic("bigquery", "", sql) + result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) conn_count_end = len(current_process.connections()) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index e229e04a2..f4355072a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -773,6 +773,26 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): assert returned_schema == expected_schema +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(not six.PY2, reason="Requires Python 2.7") +def test_dataframe_to_bq_schema_w_struct_raises_py27(module_under_test): + dataframe = pandas.DataFrame( + data=[{"struct_field": {"int_col": 1}}, {"struct_field": {"int_col": 2}}] + ) + bq_schema = [ + schema.SchemaField( + "struct_field", + field_type="STRUCT", + fields=[schema.SchemaField("int_col", field_type="INT64")], + ), + ] + + with pytest.raises(ValueError) as excinfo: + module_under_test.dataframe_to_bq_schema(dataframe, bq_schema=bq_schema) + + assert "struct (record) column types is not supported" in str(excinfo.value) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 00bc47017..c4c604ed0 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -56,7 +56,7 @@ pyarrow = None import google.api_core.exceptions -from google.api_core.gapic_v1 import client_info +from google.api_core import client_info import google.cloud._helpers from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 96ec41c51..0f1be45ee 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -41,9 +41,11 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - from google.cloud.bigquery_storage_v1 import client - - mock_client = mock.create_autospec(client.BigQueryReadClient) + if bigquery_storage_v1 is None: + return None + mock_client = mock.create_autospec( + bigquery_storage_v1.client.BigQueryReadClient + ) mock_client.transport = mock.Mock(spec=["channel"]) mock_client.transport.channel = mock.Mock(spec=["close"]) return mock_client @@ -127,6 +129,9 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() + @unittest.skipIf( + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -147,6 +152,9 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client.transport.channel.close.called) + @unittest.skipIf( + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 73e44f311..c4527c837 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -25,21 +25,10 @@ import pandas except ImportError: # pragma: NO COVER pandas = None -try: - import IPython - from IPython.utils import io - from IPython.testing import tools - from IPython.terminal import interactiveshell -except ImportError: # pragma: NO COVER - IPython = None from google.api_core import exceptions import google.auth.credentials -try: - from google.cloud import bigquery_storage_v1 -except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None from google.cloud import bigquery from google.cloud.bigquery import job from google.cloud.bigquery import table @@ -48,7 +37,11 @@ from test_utils.imports import maybe_fail_import -pytestmark = pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +IPython = pytest.importorskip("IPython") +io = pytest.importorskip("IPython.utils.io") +tools = pytest.importorskip("IPython.testing.tools") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") +bigquery_storage_v1 = pytest.importorskip("google.cloud.bigquery_storage_v1") @pytest.fixture(scope="session") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 80223e8e1..10bedfee1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2472,7 +2472,10 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe(create_bqstorage_client=False) - self.assertEqual(len(warned), 0) + user_warnings = [ + warning for warning in warned if warning.category is UserWarning + ] + self.assertEqual(len(user_warnings), 0) self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -2499,9 +2502,10 @@ def test_to_dataframe_no_tqdm(self): progress_bar_type="tqdm", create_bqstorage_client=False, ) - self.assertEqual(len(warned), 1) - for warning in warned: - self.assertIs(warning.category, UserWarning) + user_warnings = [ + warning for warning in warned if warning.category is UserWarning + ] + self.assertEqual(len(user_warnings), 1) # Even though the progress bar won't show, downloading the dataframe # should still work.