Skip to content

Commit

Permalink
feat: rename dbtime and dbdate dtypes to avoid future conflicts with …
Browse files Browse the repository at this point in the history
…pandas (#32)
  • Loading branch information
tswast committed Oct 14, 2021
1 parent cf06ae8 commit 50ea0f7
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 51 deletions.
4 changes: 2 additions & 2 deletions db_dtypes/__init__.py
Expand Up @@ -34,8 +34,8 @@
from db_dtypes import core


date_dtype_name = "date"
time_dtype_name = "time"
date_dtype_name = "dbdate"
time_dtype_name = "dbtime"

pandas_release = packaging.version.parse(pandas.__version__).release

Expand Down
12 changes: 3 additions & 9 deletions samples/snippets/pandas_date_and_time.py
Expand Up @@ -14,16 +14,13 @@


def pandas_date_and_time():
# fmt: off
# [START bigquery_date_create]

import datetime
import pandas as pd
import db_dtypes # noqa import to register dtypes

dates = pd.Series(
[datetime.date(2021, 9, 17), '2021-9-18'],
dtype='date')
dates = pd.Series([datetime.date(2021, 9, 17), "2021-9-18"], dtype="dbdate")

# [END bigquery_date_create]
# [START bigquery_date_as_datetime]
Expand All @@ -33,7 +30,7 @@ def pandas_date_and_time():
# [END bigquery_date_as_datetime]
# [START bigquery_date_sub]

dates2 = pd.Series(['2021-1-1', '2021-1-2'], dtype='date')
dates2 = pd.Series(["2021-1-1", "2021-1-2"], dtype="dbdate")
diffs = dates - dates2

# [END bigquery_date_sub]
Expand All @@ -46,9 +43,7 @@ def pandas_date_and_time():
# [END bigquery_date_do]
# [START bigquery_time_create]

times = pd.Series(
[datetime.time(1, 2, 3, 456789), '12:00:00.6'],
dtype='time')
times = pd.Series([datetime.time(1, 2, 3, 456789), "12:00:00.6"], dtype="dbtime")

# [END bigquery_time_create]
# [START bigquery_time_as_timedelta]
Expand All @@ -67,7 +62,6 @@ def pandas_date_and_time():
combined = dates + times

# [END bigquery_combine2_date_time]
# fmt: on

return (
dates,
Expand Down
4 changes: 2 additions & 2 deletions samples/snippets/pandas_date_and_time_test.py
Expand Up @@ -35,7 +35,7 @@ def test_pandas_date_and_time():
combined0,
) = pandas_date_and_time()

assert str(dates.dtype) == "date"
assert str(dates.dtype) == "dbdate"
assert list(dates) == [datetime.date(2021, 9, 17), datetime.date(2021, 9, 18)]

assert np.array_equal(
Expand All @@ -45,7 +45,7 @@ def test_pandas_date_and_time():
assert np.array_equal(after, dates.astype("object") + do)
assert np.array_equal(before, dates.astype("object") - do)

assert str(times.dtype) == "time"
assert str(times.dtype) == "dbtime"
assert list(times) == [
datetime.time(1, 2, 3, 456789),
datetime.time(12, 0, 0, 600000),
Expand Down
43 changes: 26 additions & 17 deletions tests/unit/test_arrow.py
Expand Up @@ -25,14 +25,14 @@
@pytest.mark.parametrize(
("series", "expected"),
(
(pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date32())),
(pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date32())),
(
pandas.Series([None, None, None], dtype="date"),
pandas.Series([None, None, None], dtype="dbdate"),
pyarrow.array([None, None, None], type=pyarrow.date32()),
),
(
pandas.Series(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date"
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate"
),
pyarrow.array(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
Expand All @@ -42,21 +42,25 @@
(
pandas.Series(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
dtype="date",
dtype="dbdate",
),
pyarrow.array(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
type=pyarrow.date32(),
),
),
(pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time64("ns"))),
(
pandas.Series([None, None, None], dtype="time"),
pandas.Series([], dtype="dbtime"),
pyarrow.array([], type=pyarrow.time64("ns")),
),
(
pandas.Series([None, None, None], dtype="dbtime"),
pyarrow.array([None, None, None], type=pyarrow.time64("ns")),
),
(
pandas.Series(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time"
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
dtype="dbtime",
),
pyarrow.array(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
Expand All @@ -70,7 +74,7 @@
dt.time(12, 30, 15, 125_000),
dt.time(23, 59, 59, 999_999),
],
dtype="time",
dtype="dbtime",
),
pyarrow.array(
[
Expand All @@ -91,14 +95,14 @@ def test_to_arrow(series, expected):
@pytest.mark.parametrize(
("series", "expected"),
(
(pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date64())),
(pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date64())),
(
pandas.Series([None, None, None], dtype="date"),
pandas.Series([None, None, None], dtype="dbdate"),
pyarrow.array([None, None, None], type=pyarrow.date64()),
),
(
pandas.Series(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date"
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate"
),
pyarrow.array(
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
Expand All @@ -108,21 +112,25 @@ def test_to_arrow(series, expected):
(
pandas.Series(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
dtype="date",
dtype="dbdate",
),
pyarrow.array(
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
type=pyarrow.date64(),
),
),
(pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time32("ms"))),
(
pandas.Series([None, None, None], dtype="time"),
pandas.Series([], dtype="dbtime"),
pyarrow.array([], type=pyarrow.time32("ms")),
),
(
pandas.Series([None, None, None], dtype="dbtime"),
pyarrow.array([None, None, None], type=pyarrow.time32("ms")),
),
(
pandas.Series(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], dtype="time"
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)],
dtype="dbtime",
),
pyarrow.array(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)],
Expand All @@ -131,7 +139,8 @@ def test_to_arrow(series, expected):
),
(
pandas.Series(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time"
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
dtype="dbtime",
),
pyarrow.array(
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
Expand All @@ -145,7 +154,7 @@ def test_to_arrow(series, expected):
dt.time(12, 30, 15, 125_000),
dt.time(23, 59, 59, 999_999),
],
dtype="time",
dtype="dbtime",
),
pyarrow.array(
[
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_date.py
Expand Up @@ -38,7 +38,7 @@
],
)
def test_date_parsing(value, expected):
assert pandas.Series([value], dtype="date")[0] == expected
assert pandas.Series([value], dtype="dbdate")[0] == expected


@pytest.mark.parametrize(
Expand All @@ -59,4 +59,4 @@ def test_date_parsing(value, expected):
)
def test_date_parsing_errors(value, error):
with pytest.raises(ValueError, match=error):
pandas.Series([value], dtype="date")
pandas.Series([value], dtype="dbdate")
34 changes: 17 additions & 17 deletions tests/unit/test_dtypes.py
Expand Up @@ -23,39 +23,39 @@
pandas_release = packaging.version.parse(pd.__version__).release

SAMPLE_RAW_VALUES = dict(
date=(datetime.date(2021, 2, 2), "2021-2-3", None),
time=(datetime.time(1, 2, 2), "1:2:3.5", None),
dbdate=(datetime.date(2021, 2, 2), "2021-2-3", None),
dbtime=(datetime.time(1, 2, 2), "1:2:3.5", None),
)
SAMPLE_VALUES = dict(
date=(
dbdate=(
datetime.date(2021, 2, 2),
datetime.date(2021, 2, 3),
datetime.date(2021, 2, 4),
datetime.date(2021, 2, 5),
),
time=(
dbtime=(
datetime.time(1, 2, 2),
datetime.time(1, 2, 3, 500000),
datetime.time(1, 2, 4, 500000),
datetime.time(1, 2, 5, 500000),
),
)
SAMPLE_DT_VALUES = dict(
date=(
dbdate=(
"2021-02-02T00:00:00.000000",
"2021-02-03T00:00:00.000000",
"2021-02-04T00:00:00.000000",
"2021-02-05T00:00:00.000000",
),
time=(
dbtime=(
"1970-01-01T01:02:02.000000",
"1970-01-01T01:02:03.500000",
"1970-01-01T01:02:04.500000",
"1970-01-01T01:02:05.500000",
),
)

for_date_and_time = pytest.mark.parametrize("dtype", ["date", "time"])
for_date_and_time = pytest.mark.parametrize("dtype", ["dbdate", "dbtime"])


def eq_na(a1, a2):
Expand All @@ -72,7 +72,7 @@ def register_dtype():
def _cls(dtype):
import db_dtypes

return getattr(db_dtypes, dtype.capitalize() + "Array")
return getattr(db_dtypes, dtype[2:].capitalize() + "Array")


def _make_one(dtype):
Expand Down Expand Up @@ -322,7 +322,7 @@ def test_take(dtype, allow_fill, fill_value):
if fill_value == 42:
fill_value = expected_fill = (
datetime.date(1971, 4, 2)
if dtype == "date"
if dtype == "dbdate"
else datetime.time(0, 42, 42, 424242)
)
else:
Expand Down Expand Up @@ -441,7 +441,7 @@ def test_astype_copy(dtype):
],
)
def test_asdatetime(dtype, same):
a = _make_one("date")
a = _make_one("dbdate")
for dt in dtype, np.dtype(dtype) if dtype != "datetime" else dtype:
if same:
b = a.astype(dt, copy=False)
Expand Down Expand Up @@ -480,7 +480,7 @@ def test_astimedelta(dtype):
.astype("timedelta64[ns]" if dtype == "timedelta" else dtype)
)

a = _cls("time")([t, None])
a = _cls("dbtime")([t, None])
b = a.astype(dtype)
np.array_equal(b[:1], expect)
assert pd.isna(b[1]) and str(b[1]) == "NaT"
Expand Down Expand Up @@ -526,7 +526,7 @@ def test_min_max_median(dtype):
if pandas_release >= (1, 2):
assert (
a.median() == datetime.time(1, 2, 4)
if dtype == "time"
if dtype == "dbtime"
else datetime.date(2021, 2, 3)
)

Expand All @@ -553,14 +553,14 @@ def test_min_max_median(dtype):
if pandas_release >= (1, 2):
assert (
a.median() == datetime.time(1, 2, 2, 750000)
if dtype == "time"
if dtype == "dbtime"
else datetime.date(2021, 2, 2)
)


def test_date_add():
dates = _cls("date")(SAMPLE_VALUES["date"])
times = _cls("time")(SAMPLE_VALUES["time"])
dates = _cls("dbdate")(SAMPLE_VALUES["dbdate"])
times = _cls("dbtime")(SAMPLE_VALUES["dbtime"])
expect = dates.astype("datetime64") + times.astype("timedelta64")

assert np.array_equal(dates + times, expect)
Expand Down Expand Up @@ -592,8 +592,8 @@ def test_date_add():


def test_date_sub():
dates = _cls("date")(SAMPLE_VALUES["date"])
dates2 = _cls("date")(
dates = _cls("dbdate")(SAMPLE_VALUES["dbdate"])
dates2 = _cls("dbdate")(
(
datetime.date(2021, 1, 2),
datetime.date(2021, 1, 3),
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_time.py
Expand Up @@ -61,7 +61,7 @@
],
)
def test_time_parsing(value, expected):
assert pandas.Series([value], dtype="time")[0] == expected
assert pandas.Series([value], dtype="dbtime")[0] == expected


@pytest.mark.parametrize(
Expand All @@ -81,4 +81,4 @@ def test_time_parsing(value, expected):
)
def test_time_parsing_errors(value, error):
with pytest.raises(ValueError, match=error):
pandas.Series([value], dtype="time")
pandas.Series([value], dtype="dbtime")

0 comments on commit 50ea0f7

Please sign in to comment.