Skip to content

Commit

Permalink
fix: support converting empty time Series to pyarrow Array (#11)
Browse files Browse the repository at this point in the history
* fix: support converting empty `time` Series to pyarrow Array

* use object dtype for time numpy array

* backport to_numpy

* remove redundant test
  • Loading branch information
tswast committed Sep 29, 2021
1 parent c65ff18 commit 7675b15
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 12 deletions.
3 changes: 2 additions & 1 deletion db_dtypes/__init__.py
Expand Up @@ -125,7 +125,8 @@ def to_numpy(self, dtype="object"):

def __arrow_array__(self, type=None):
return pyarrow.array(
self.to_numpy(), type=type if type is not None else pyarrow.time64("ns"),
self.to_numpy(dtype="object"),
type=type if type is not None else pyarrow.time64("ns"),
)


Expand Down
163 changes: 163 additions & 0 deletions tests/unit/test_arrow.py
@@ -0,0 +1,163 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime as dt

import pandas
import pyarrow
import pytest

# To register the types.
import db_dtypes # noqa


@pytest.mark.parametrize(
("series", "expected"),
(
(pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date32())),
(
pandas.Series([None, None, None], dtype="date"),
pyarrow.array([None, None, None], type=pyarrow.date32()),
),
(
pandas.Series(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date"
),
pyarrow.array(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
type=pyarrow.date32(),
),
),
(
pandas.Series(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
dtype="date",
),
pyarrow.array(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
type=pyarrow.date32(),
),
),
(pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time64("ns"))),
(
pandas.Series([None, None, None], dtype="time"),
pyarrow.array([None, None, None], type=pyarrow.time64("ns")),
),
(
pandas.Series(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time"
),
pyarrow.array(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
type=pyarrow.time64("ns"),
),
),
(
pandas.Series(
[
dt.time(0, 0, 0, 0),
dt.time(12, 30, 15, 125_000),
dt.time(23, 59, 59, 999_999),
],
dtype="time",
),
pyarrow.array(
[
dt.time(0, 0, 0, 0),
dt.time(12, 30, 15, 125_000),
dt.time(23, 59, 59, 999_999),
],
type=pyarrow.time64("ns"),
),
),
),
)
def test_to_arrow(series, expected):
array = pyarrow.array(series)
assert array.equals(expected)


@pytest.mark.parametrize(
("series", "expected"),
(
(pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date64())),
(
pandas.Series([None, None, None], dtype="date"),
pyarrow.array([None, None, None], type=pyarrow.date64()),
),
(
pandas.Series(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date"
),
pyarrow.array(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
type=pyarrow.date64(),
),
),
(
pandas.Series(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
dtype="date",
),
pyarrow.array(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
type=pyarrow.date64(),
),
),
(pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time32("ms"))),
(
pandas.Series([None, None, None], dtype="time"),
pyarrow.array([None, None, None], type=pyarrow.time32("ms")),
),
(
pandas.Series(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], dtype="time"
),
pyarrow.array(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)],
type=pyarrow.time32("ms"),
),
),
(
pandas.Series(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time"
),
pyarrow.array(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
type=pyarrow.time64("us"),
),
),
(
pandas.Series(
[
dt.time(0, 0, 0, 0),
dt.time(12, 30, 15, 125_000),
dt.time(23, 59, 59, 999_999),
],
dtype="time",
),
pyarrow.array(
[
dt.time(0, 0, 0, 0),
dt.time(12, 30, 15, 125_000),
dt.time(23, 59, 59, 999_999),
],
type=pyarrow.time64("us"),
),
),
),
)
def test_to_arrow_w_arrow_type(series, expected):
array = pyarrow.array(series, type=expected.type)
assert array.equals(expected)
11 changes: 0 additions & 11 deletions tests/unit/test_dtypes.py
Expand Up @@ -15,7 +15,6 @@
import datetime

import packaging.version
import pyarrow.lib
import pytest

pd = pytest.importorskip("pandas")
Expand Down Expand Up @@ -670,13 +669,3 @@ def test_bad_time_parsing(value, error):
def test_bad_date_parsing(value, error):
with pytest.raises(ValueError, match=error):
_cls("date")([value])


@for_date_and_time
def test_date___arrow__array__(dtype):
a = _make_one(dtype)
ar = a.__arrow_array__()
assert isinstance(
ar, pyarrow.Date32Array if dtype == "date" else pyarrow.Time64Array,
)
assert [v.as_py() for v in ar] == list(a)

0 comments on commit 7675b15

Please sign in to comment.