Skip to content

Commit

Permalink
feat: add support for INTERVAL data type to list_rows (#840)
Browse files Browse the repository at this point in the history
* test: refactor `list_rows` tests and add test for scalars

* WIP: INTERVAL support

* feat: add support for INTERVAL data type to `list_rows`

* fix relativedelta construction for non-microseconds

* WIP: support INTERVAL query params

* remove dead code

* INTERVAL not supported in query parameters

* revert query parameter changes

* add validation error for interval

* add unit tests for extreme intervals

* add dateutil to intersphinx

* use dictionary for intersphinx

* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* add test case for trailing .

* explicit none

* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* truncate nanoseconds

* use \d group for digits

* use \d for consistency

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Peter Lamut <plamut@users.noreply.github.com>
  • Loading branch information
3 people committed Oct 26, 2021
1 parent 1f864fd commit e37380a
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 13 deletions.
3 changes: 2 additions & 1 deletion docs/conf.py
Expand Up @@ -366,8 +366,9 @@
"grpc": ("https://grpc.github.io/grpc/python/", None),
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
"pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
"dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),
"geopandas": ("https://geopandas.org/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/dev", None),
}


Expand Down
47 changes: 46 additions & 1 deletion google/cloud/bigquery/_helpers.py
Expand Up @@ -19,8 +19,9 @@
import decimal
import math
import re
from typing import Any, Union
from typing import Any, Optional, Union

from dateutil import relativedelta
from google.cloud._helpers import UTC
from google.cloud._helpers import _date_from_iso8601_date
from google.cloud._helpers import _datetime_from_microseconds
Expand All @@ -45,6 +46,14 @@
re.VERBOSE,
)

# BigQuery sends INTERVAL data in "canonical format"
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type
_INTERVAL_PATTERN = re.compile(
r"(?P<calendar_sign>-?)(?P<years>\d+)-(?P<months>\d+) "
r"(?P<days>-?\d+) "
r"(?P<time_sign>-?)(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)\.?(?P<fraction>\d*)?$"
)

_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
Expand Down Expand Up @@ -191,6 +200,41 @@ def _int_from_json(value, field):
return int(value)


def _interval_from_json(
value: Optional[str], field
) -> Optional[relativedelta.relativedelta]:
"""Coerce 'value' to an interval, if set or not nullable."""
if not _not_null(value, field):
return None
if value is None:
raise TypeError(f"got {value} for REQUIRED field: {repr(field)}")

parsed = _INTERVAL_PATTERN.match(value)
if parsed is None:
raise ValueError(f"got interval: '{value}' with unexpected format")

calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1
years = calendar_sign * int(parsed.group("years"))
months = calendar_sign * int(parsed.group("months"))
days = int(parsed.group("days"))
time_sign = -1 if parsed.group("time_sign") == "-" else 1
hours = time_sign * int(parsed.group("hours"))
minutes = time_sign * int(parsed.group("minutes"))
seconds = time_sign * int(parsed.group("seconds"))
fraction = parsed.group("fraction")
microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0

return relativedelta.relativedelta(
years=years,
months=months,
days=days,
hours=hours,
minutes=minutes,
seconds=seconds,
microseconds=microseconds,
)


def _float_from_json(value, field):
"""Coerce 'value' to a float, if set or not nullable."""
if _not_null(value, field):
Expand Down Expand Up @@ -327,6 +371,7 @@ def _record_from_json(value, field):
_CELLDATA_FROM_JSON = {
"INTEGER": _int_from_json,
"INT64": _int_from_json,
"INTERVAL": _interval_from_json,
"FLOAT": _float_from_json,
"FLOAT64": _float_from_json,
"NUMERIC": _decimal_from_json,
Expand Down
1 change: 1 addition & 0 deletions google/cloud/bigquery/enums.py
Expand Up @@ -254,6 +254,7 @@ class SqlTypeNames(str, enum.Enum):
DATE = "DATE"
TIME = "TIME"
DATETIME = "DATETIME"
INTERVAL = "INTERVAL" # NOTE: not available in legacy types


class SqlParameterScalarTypes:
Expand Down
7 changes: 2 additions & 5 deletions owlbot.py
Expand Up @@ -98,8 +98,9 @@
microgenerator=True,
split_system_tests=True,
intersphinx_dependencies={
"pandas": "http://pandas.pydata.org/pandas-docs/stable/",
"dateutil": "https://dateutil.readthedocs.io/en/latest/",
"geopandas": "https://geopandas.org/",
"pandas": "https://pandas.pydata.org/pandas-docs/dev",
},
)

Expand All @@ -115,10 +116,6 @@
# Include custom SNIPPETS_TESTS job for performance.
# https://github.com/googleapis/python-bigquery/issues/191
".kokoro/presubmit/presubmit.cfg",
# Group all renovate PRs together. If this works well, remove this and
# update the shared templates (possibly with configuration option to
# py_library.)
"renovate.json",
],
)

Expand Down
5 changes: 4 additions & 1 deletion renovate.json
@@ -1,6 +1,9 @@
{
"extends": [
"config:base", "group:all", ":preserveSemverRanges"
"config:base",
"group:all",
":preserveSemverRanges",
":disableDependencyDashboard"
],
"ignorePaths": [".pre-commit-config.yaml"],
"pip_requirements": {
Expand Down
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -42,6 +42,7 @@
"google-resumable-media >= 0.6.0, < 3.0dev",
"packaging >= 14.3",
"protobuf >= 3.12.0",
"python-dateutil >= 2.7.2, <3.0dev",
"requests >= 2.18.0, < 3.0.0dev",
]
extras = {
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.6.txt
Expand Up @@ -18,6 +18,7 @@ pandas==0.24.2
proto-plus==1.10.0
protobuf==3.12.0
pyarrow==3.0.0
python-dateutil==2.7.2
requests==2.18.0
Shapely==1.6.0
six==1.13.0
Expand Down
5 changes: 0 additions & 5 deletions tests/system/test_client.py
Expand Up @@ -37,11 +37,6 @@
except ImportError: # pragma: NO COVER
bigquery_storage = None

try:
import fastavro # to parse BQ storage client results
except ImportError: # pragma: NO COVER
fastavro = None

try:
import pyarrow
import pyarrow.types
Expand Down
8 changes: 8 additions & 0 deletions tests/system/test_list_rows.py
Expand Up @@ -15,6 +15,8 @@
import datetime
import decimal

from dateutil import relativedelta

from google.cloud import bigquery
from google.cloud.bigquery import enums

Expand Down Expand Up @@ -64,6 +66,9 @@ def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str)
assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
assert row["int64_col"] == 123456789
assert row["interval_col"] == relativedelta.relativedelta(
years=7, months=11, days=9, hours=4, minutes=15, seconds=37, microseconds=123456
)
assert row["numeric_col"] == decimal.Decimal("1.23456789")
assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
assert row["float64_col"] == 1.25
Expand Down Expand Up @@ -95,6 +100,9 @@ def test_list_rows_scalars_extreme(
assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
assert row["geography_col"] == "POINT(-135 90)"
assert row["int64_col"] == 9223372036854775807
assert row["interval_col"] == relativedelta.relativedelta(
years=-10000, days=-3660000, hours=-87840000
)
assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28")
assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37")
assert row["float64_col"] == float("Inf")
Expand Down
157 changes: 157 additions & 0 deletions tests/unit/helpers/test_from_json.py
@@ -0,0 +1,157 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dateutil.relativedelta import relativedelta
import pytest

from google.cloud.bigquery.schema import SchemaField


def create_field(mode="NULLABLE", type_="IGNORED"):
return SchemaField("test_field", type_, mode=mode)


@pytest.fixture
def mut():
from google.cloud.bigquery import _helpers

return _helpers


def test_interval_from_json_w_none_nullable(mut):
got = mut._interval_from_json(None, create_field())
assert got is None


def test_interval_from_json_w_none_required(mut):
with pytest.raises(TypeError):
mut._interval_from_json(None, create_field(mode="REQUIRED"))


def test_interval_from_json_w_invalid_format(mut):
with pytest.raises(ValueError, match="NOT_AN_INTERVAL"):
mut._interval_from_json("NOT_AN_INTERVAL", create_field())


@pytest.mark.parametrize(
("value", "expected"),
(
("0-0 0 0:0:0", relativedelta()),
# SELECT INTERVAL X YEAR
("-10000-0 0 0:0:0", relativedelta(years=-10000)),
("-1-0 0 0:0:0", relativedelta(years=-1)),
("1-0 0 0:0:0", relativedelta(years=1)),
("10000-0 0 0:0:0", relativedelta(years=10000)),
# SELECT INTERVAL X MONTH
("-0-11 0 0:0:0", relativedelta(months=-11)),
("-0-1 0 0:0:0", relativedelta(months=-1)),
("0-1 0 0:0:0", relativedelta(months=1)),
("0-11 0 0:0:0", relativedelta(months=11)),
# SELECT INTERVAL X DAY
("0-0 -3660000 0:0:0", relativedelta(days=-3660000)),
("0-0 -1 0:0:0", relativedelta(days=-1)),
("0-0 1 0:0:0", relativedelta(days=1)),
("0-0 3660000 0:0:0", relativedelta(days=3660000)),
# SELECT INTERVAL X HOUR
("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)),
("0-0 0 -1:0:0", relativedelta(hours=-1)),
("0-0 0 1:0:0", relativedelta(hours=1)),
("0-0 0 87840000:0:0", relativedelta(hours=87840000)),
# SELECT INTERVAL X MINUTE
("0-0 0 -0:59:0", relativedelta(minutes=-59)),
("0-0 0 -0:1:0", relativedelta(minutes=-1)),
("0-0 0 0:1:0", relativedelta(minutes=1)),
("0-0 0 0:59:0", relativedelta(minutes=59)),
# SELECT INTERVAL X SECOND
("0-0 0 -0:0:59", relativedelta(seconds=-59)),
("0-0 0 -0:0:1", relativedelta(seconds=-1)),
("0-0 0 0:0:1", relativedelta(seconds=1)),
("0-0 0 0:0:59", relativedelta(seconds=59)),
# SELECT (INTERVAL -1 SECOND) / 1000000
("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)),
("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)),
("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)),
("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)),
("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)),
# Test with multiple digits in each section.
(
"32-11 45 67:16:23.987654",
relativedelta(
years=32,
months=11,
days=45,
hours=67,
minutes=16,
seconds=23,
microseconds=987654,
),
),
(
"-32-11 -45 -67:16:23.987654",
relativedelta(
years=-32,
months=-11,
days=-45,
hours=-67,
minutes=-16,
seconds=-23,
microseconds=-987654,
),
),
# Test with mixed +/- sections.
(
"9999-9 -999999 9999999:59:59.999999",
relativedelta(
years=9999,
months=9,
days=-999999,
hours=9999999,
minutes=59,
seconds=59,
microseconds=999999,
),
),
# Test with fraction that is not microseconds.
("0-0 0 0:0:42.", relativedelta(seconds=42)),
("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)),
("0-0 0 0:0:0.12", relativedelta(microseconds=120000)),
("0-0 0 0:0:0.123", relativedelta(microseconds=123000)),
("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)),
# Fractional seconds can cause rounding problems if cast to float. See:
# https://github.com/googleapis/python-db-dtypes-pandas/issues/18
("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)),
(
"0-0 0 01:01:01.010101",
relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101),
),
(
"0-0 0 09:09:09.090909",
relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909),
),
(
"0-0 0 11:11:11.111111",
relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111),
),
(
"0-0 0 19:16:23.987654",
relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654),
),
# Nanoseconds are not expected, but should not cause error.
("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)),
("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)),
),
)
def test_w_string_values(mut, value, expected):
got = mut._interval_from_json(value, create_field())
assert got == expected

0 comments on commit e37380a

Please sign in to comment.