Skip to content

Commit

Permalink
fix: avoid rounding problems with microseconds (#20)
Browse files Browse the repository at this point in the history
* fix: avoid rounding problems with microseconds

* add more tests

* adjust regex to parse fraction as integer
  • Loading branch information
tswast committed Oct 4, 2021
1 parent de3bb56 commit 0ff7371
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 59 deletions.
26 changes: 15 additions & 11 deletions db_dtypes/__init__.py
Expand Up @@ -69,29 +69,33 @@ def _datetime(
cls,
scalar,
match_fn=re.compile(
r"\s*(?P<hour>\d+)(?::(?P<minute>\d+)(?::(?P<second>\d+(?:[.]\d+)?)?)?)?\s*$"
r"\s*(?P<hours>\d+)"
r"(?::(?P<minutes>\d+)"
r"(?::(?P<seconds>\d+)"
r"(?:\.(?P<fraction>\d*))?)?)?\s*$"
).match,
):
if isinstance(scalar, datetime.time):
return datetime.datetime.combine(cls._epoch, scalar)
elif isinstance(scalar, str):
# iso string
match = match_fn(scalar)
if not match:
parsed = match_fn(scalar)
if not parsed:
raise ValueError(f"Bad time string: {repr(scalar)}")

hour = match.group("hour")
minute = match.group("minute")
second = match.group("second")
second, microsecond = divmod(float(second if second else 0), 1)
hours = parsed.group("hours")
minutes = parsed.group("minutes")
seconds = parsed.group("seconds")
fraction = parsed.group("fraction")
microseconds = int(fraction.ljust(6, "0")[:6]) if fraction else 0
return datetime.datetime(
1970,
1,
1,
int(hour),
int(minute if minute else 0),
int(second),
int(microsecond * 1_000_000),
int(hours),
int(minutes) if minutes else 0,
int(seconds) if seconds else 0,
microseconds,
)
else:
raise TypeError("Invalid value type", scalar)
Expand Down
62 changes: 62 additions & 0 deletions tests/unit/test_date.py
@@ -0,0 +1,62 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime

import pandas
import pytest

# To register the types.
import db_dtypes # noqa


@pytest.mark.parametrize(
"value, expected",
[
# Min/Max values for pandas.Timestamp.
("1677-09-22", datetime.date(1677, 9, 22)),
("2262-04-11", datetime.date(2262, 4, 11)),
# Typical "zero" values.
("1900-01-01", datetime.date(1900, 1, 1)),
("1970-01-01", datetime.date(1970, 1, 1)),
# Assorted values.
("1993-10-31", datetime.date(1993, 10, 31)),
("2012-02-29", datetime.date(2012, 2, 29)),
("2021-12-17", datetime.date(2021, 12, 17)),
("2038-01-19", datetime.date(2038, 1, 19)),
],
)
def test_date_parsing(value, expected):
assert pandas.Series([value], dtype="date")[0] == expected


@pytest.mark.parametrize(
"value, error",
[
("thursday", "Bad date string: 'thursday'"),
("1-2-thursday", "Bad date string: '1-2-thursday'"),
("1-2-3-4", "Bad date string: '1-2-3-4'"),
("1-2-3.f", "Bad date string: '1-2-3.f'"),
("1-d-3", "Bad date string: '1-d-3'"),
("1-3", "Bad date string: '1-3'"),
("1", "Bad date string: '1'"),
("", "Bad date string: ''"),
("2021-2-99", "day is out of range for month"),
("2021-99-1", "month must be in 1[.][.]12"),
("10000-1-1", "year 10000 is out of range"),
],
)
def test_date_parsing_errors(value, error):
with pytest.raises(ValueError, match=error):
pandas.Series([value], dtype="date")
48 changes: 0 additions & 48 deletions tests/unit/test_dtypes.py
Expand Up @@ -621,51 +621,3 @@ def test_date_sub():
do = pd.Series([pd.DateOffset(days=i) for i in range(4)])
expect = dates.astype("object") - do
assert np.array_equal(dates - do, expect)


@pytest.mark.parametrize(
"value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2))],
)
def test_short_time_parsing(value, expected):
assert _cls("time")([value])[0] == expected


@pytest.mark.parametrize(
"value, error",
[
("thursday", "Bad time string: 'thursday'"),
("1:2:3thursday", "Bad time string: '1:2:3thursday'"),
("1:2:3:4", "Bad time string: '1:2:3:4'"),
("1:2:3.f", "Bad time string: '1:2:3.f'"),
("1:d:3", "Bad time string: '1:d:3'"),
("1:2.3", "Bad time string: '1:2.3'"),
("", "Bad time string: ''"),
("1:2:99", "second must be in 0[.][.]59"),
("1:99", "minute must be in 0[.][.]59"),
("99", "hour must be in 0[.][.]23"),
],
)
def test_bad_time_parsing(value, error):
with pytest.raises(ValueError, match=error):
_cls("time")([value])


@pytest.mark.parametrize(
"value, error",
[
("thursday", "Bad date string: 'thursday'"),
("1-2-thursday", "Bad date string: '1-2-thursday'"),
("1-2-3-4", "Bad date string: '1-2-3-4'"),
("1-2-3.f", "Bad date string: '1-2-3.f'"),
("1-d-3", "Bad date string: '1-d-3'"),
("1-3", "Bad date string: '1-3'"),
("1", "Bad date string: '1'"),
("", "Bad date string: ''"),
("2021-2-99", "day is out of range for month"),
("2021-99-1", "month must be in 1[.][.]12"),
("10000-1-1", "year 10000 is out of range"),
],
)
def test_bad_date_parsing(value, error):
with pytest.raises(ValueError, match=error):
_cls("date")([value])
84 changes: 84 additions & 0 deletions tests/unit/test_time.py
@@ -0,0 +1,84 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime

import pandas
import pytest

# To register the types.
import db_dtypes # noqa


@pytest.mark.parametrize(
"value, expected",
[
# Midnight
("0", datetime.time(0)),
("0:0", datetime.time(0)),
("0:0:0", datetime.time(0)),
("0:0:0.", datetime.time(0)),
("0:0:0.0", datetime.time(0)),
("0:0:0.000000", datetime.time(0)),
("00:00:00", datetime.time(0, 0, 0)),
(" 00:00:00 ", datetime.time(0, 0, 0)),
# Short values
("1", datetime.time(1)),
("23", datetime.time(23)),
("1:2", datetime.time(1, 2)),
("23:59", datetime.time(23, 59)),
("1:2:3", datetime.time(1, 2, 3)),
("23:59:59", datetime.time(23, 59, 59)),
# Non-octal values.
("08:08:08", datetime.time(8, 8, 8)),
("09:09:09", datetime.time(9, 9, 9)),
# Fractional seconds can cause rounding problems if cast to float. See:
# https://github.com/googleapis/python-db-dtypes-pandas/issues/18
("0:0:59.876543", datetime.time(0, 0, 59, 876543)),
("01:01:01.010101", datetime.time(1, 1, 1, 10101)),
("09:09:09.090909", datetime.time(9, 9, 9, 90909)),
("11:11:11.111111", datetime.time(11, 11, 11, 111111)),
("19:16:23.987654", datetime.time(19, 16, 23, 987654)),
# Microsecond precision
("00:00:00.000001", datetime.time(0, 0, 0, 1)),
("23:59:59.999999", datetime.time(23, 59, 59, 999_999)),
# TODO: Support nanosecond precision values without truncation.
# https://github.com/googleapis/python-db-dtypes-pandas/issues/19
("0:0:0.000001001", datetime.time(0, 0, 0, 1)),
("23:59:59.999999000", datetime.time(23, 59, 59, 999_999)),
("23:59:59.999999999", datetime.time(23, 59, 59, 999_999)),
],
)
def test_time_parsing(value, expected):
assert pandas.Series([value], dtype="time")[0] == expected


@pytest.mark.parametrize(
"value, error",
[
("thursday", "Bad time string: 'thursday'"),
("1:2:3thursday", "Bad time string: '1:2:3thursday'"),
("1:2:3:4", "Bad time string: '1:2:3:4'"),
("1:2:3.f", "Bad time string: '1:2:3.f'"),
("1:d:3", "Bad time string: '1:d:3'"),
("1:2.3", "Bad time string: '1:2.3'"),
("", "Bad time string: ''"),
("1:2:99", "second must be in 0[.][.]59"),
("1:99", "minute must be in 0[.][.]59"),
("99", "hour must be in 0[.][.]23"),
],
)
def test_time_parsing_errors(value, error):
with pytest.raises(ValueError, match=error):
pandas.Series([value], dtype="time")

0 comments on commit 0ff7371

Please sign in to comment.