Skip to content

Commit

Permalink
deps: expand pyarrow dependencies to include version 2 (#368)
Browse files Browse the repository at this point in the history
Pyarrow 2.0 includes several bug fixes. The wire format remains the same, so it continues to be compatible with the BigQuery Storage API.
  • Loading branch information
tswast committed Nov 10, 2020
1 parent 30de15f commit cd9febd
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
4 changes: 2 additions & 2 deletions setup.py
Expand Up @@ -46,12 +46,12 @@
# grpc.Channel.close() method isn't added until 1.32.0.
# https://github.com/grpc/grpc/pull/15254
"grpcio >= 1.32.0, < 2.0dev",
"pyarrow >= 1.0.0, < 2.0dev",
"pyarrow >= 1.0.0, < 3.0dev",
],
"pandas": [
"pandas>=0.23.0",
# pyarrow 1.0.0 is required for the use of timestamp_as_object keyword.
"pyarrow >= 1.0.0, < 2.0dev",
"pyarrow >= 1.0.0, < 3.0dev",
],
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
"opentelemetry": [
Expand Down
16 changes: 15 additions & 1 deletion tests/unit/test_table.py
Expand Up @@ -19,6 +19,7 @@
import warnings

import mock
import pkg_resources
import pytest
import six

Expand All @@ -41,8 +42,11 @@
try:
import pyarrow
import pyarrow.types

PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__)
except ImportError: # pragma: NO COVER
pyarrow = None
PYARROW_VERSION = pkg_resources.parse_version("0.0.1")

try:
from tqdm import tqdm
Expand All @@ -52,6 +56,9 @@
from google.cloud.bigquery.dataset import DatasetReference


PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0")


def _mock_client():
from google.cloud.bigquery import client

Expand Down Expand Up @@ -2339,12 +2346,19 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self):

df = row_iterator.to_dataframe(create_bqstorage_client=False)

tzinfo = None
if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION:
tzinfo = dt.timezone.utc

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2) # verify the number of rows
self.assertEqual(list(df.columns), ["some_timestamp"])
self.assertEqual(
list(df["some_timestamp"]),
[dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)],
[
dt.datetime(4567, 1, 1, tzinfo=tzinfo),
dt.datetime(9999, 12, 31, tzinfo=tzinfo),
],
)

@pytest.mark.xfail(
Expand Down

2 comments on commit cd9febd

@emkornfield
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a note there is an issue with arrow 2.0 for writing nested parquet data. we are discussing a patch release: ARROW-10493

@tswast
Copy link
Contributor Author

@tswast tswast commented on cd9febd Nov 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the heads up. We don't officially support nested data yet, so we didn't have any system / samples tests fail for this.

Please sign in to comment.