From cd9febd20c34983781386c3bf603e5fca7135695 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 11:48:10 -0600 Subject: [PATCH] deps: expand pyarrow dependencies to include version 2 (#368) Pyarrow 2.0 includes several bug fixes. The wire format remains the same, so it continues to be compatible with the BigQuery Storage API. --- setup.py | 4 ++-- tests/unit/test_table.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 548ceac09..48c4a7518 100644 --- a/setup.py +++ b/setup.py @@ -46,12 +46,12 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 2.0dev", + "pyarrow >= 1.0.0, < 3.0dev", ], "pandas": [ "pandas>=0.23.0", # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. - "pyarrow >= 1.0.0, < 2.0dev", + "pyarrow >= 1.0.0, < 3.0dev", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e232f32e6..eccc46a7a 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -19,6 +19,7 @@ import warnings import mock +import pkg_resources import pytest import six @@ -41,8 +42,11 @@ try: import pyarrow import pyarrow.types + + PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None + PYARROW_VERSION = pkg_resources.parse_version("0.0.1") try: from tqdm import tqdm @@ -52,6 +56,9 @@ from google.cloud.bigquery.dataset import DatasetReference +PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") + + def _mock_client(): from google.cloud.bigquery import client @@ -2339,12 +2346,19 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) + tzinfo = None + if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: + tzinfo = dt.timezone.utc + self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) self.assertEqual( list(df["some_timestamp"]), - [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + [ + dt.datetime(4567, 1, 1, tzinfo=tzinfo), + dt.datetime(9999, 12, 31, tzinfo=tzinfo), + ], ) @pytest.mark.xfail(