From e37380a959cbd5bb9cbbf6807f0a8ea147e0a713 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 26 Oct 2021 11:04:13 -0500 Subject: [PATCH] feat: add support for INTERVAL data type to `list_rows` (#840) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test: refactor `list_rows` tests and add test for scalars * WIP: INTERVAL support * feat: add support for INTERVAL data type to `list_rows` * fix relativedelta construction for non-microseconds * WIP: support INTERVAL query params * remove dead code * INTERVAL not supported in query parameters * revert query parameter changes * add validation error for interval * add unit tests for extreme intervals * add dateutil to intersphinx * use dictionary for intersphinx * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add test case for trailing . * explicit none * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * truncate nanoseconds * use \d group for digits * use \d for consistency Co-authored-by: Owl Bot Co-authored-by: Peter Lamut --- docs/conf.py | 3 +- google/cloud/bigquery/_helpers.py | 47 +++++++- google/cloud/bigquery/enums.py | 1 + owlbot.py | 7 +- renovate.json | 5 +- setup.py | 1 + testing/constraints-3.6.txt | 1 + tests/system/test_client.py | 5 - tests/system/test_list_rows.py | 8 ++ tests/unit/helpers/test_from_json.py | 157 +++++++++++++++++++++++++++ 10 files changed, 222 insertions(+), 13 deletions(-) create mode 100644 tests/unit/helpers/test_from_json.py diff --git a/docs/conf.py b/docs/conf.py index 329951636..0784da0b2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -366,8 +366,9 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), + "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), "geopandas": ("https://geopandas.org/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None), } diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d7189d322..e95d38545 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,8 +19,9 @@ import decimal import math import re -from typing import Any, Union +from typing import Any, Optional, Union +from dateutil import relativedelta from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds @@ -45,6 +46,14 @@ re.VERBOSE, ) +# BigQuery sends INTERVAL data in "canonical format" +# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type +_INTERVAL_PATTERN = re.compile( + r"(?P-?)(?P\d+)-(?P\d+) " + r"(?P-?\d+) " + r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" +) + _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -191,6 +200,41 @@ def _int_from_json(value, field): return int(value) +def _interval_from_json( + value: Optional[str], field +) -> Optional[relativedelta.relativedelta]: + """Coerce 'value' to an interval, if set or not nullable.""" + if not _not_null(value, field): + return None + if value is None: + raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") + + parsed = _INTERVAL_PATTERN.match(value) + if parsed is None: + raise ValueError(f"got interval: '{value}' with unexpected format") + + calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 + years = calendar_sign * int(parsed.group("years")) + months = calendar_sign * int(parsed.group("months")) + days = int(parsed.group("days")) + time_sign = -1 if parsed.group("time_sign") == "-" else 1 + hours = time_sign * int(parsed.group("hours")) + minutes = time_sign * int(parsed.group("minutes")) + seconds = time_sign * int(parsed.group("seconds")) + fraction = parsed.group("fraction") + microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 + + return relativedelta.relativedelta( + years=years, + months=months, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + microseconds=microseconds, + ) + + def _float_from_json(value, field): """Coerce 'value' to a float, if set or not nullable.""" if _not_null(value, field): @@ -327,6 +371,7 @@ def _record_from_json(value, field): _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, "INT64": _int_from_json, + "INTERVAL": _interval_from_json, "FLOAT": _float_from_json, "FLOAT64": _float_from_json, "NUMERIC": _decimal_from_json, diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d67cebd4c..0eaaffd2e 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -254,6 +254,7 @@ class SqlTypeNames(str, enum.Enum): DATE = "DATE" TIME = "TIME" DATETIME = "DATETIME" + INTERVAL = "INTERVAL" # NOTE: not available in legacy types class SqlParameterScalarTypes: diff --git a/owlbot.py b/owlbot.py index 0f6f8fe99..f2f8bea54 100644 --- a/owlbot.py +++ b/owlbot.py @@ -98,8 +98,9 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": "http://pandas.pydata.org/pandas-docs/stable/", + "dateutil": "https://dateutil.readthedocs.io/en/latest/", "geopandas": "https://geopandas.org/", + "pandas": "https://pandas.pydata.org/pandas-docs/dev", }, ) @@ -115,10 +116,6 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - # Group all renovate PRs together. If this works well, remove this and - # update the shared templates (possibly with configuration option to - # py_library.) - "renovate.json", ], ) diff --git a/renovate.json b/renovate.json index 713c60bb4..c21036d38 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,9 @@ { "extends": [ - "config:base", "group:all", ":preserveSemverRanges" + "config:base", + "group:all", + ":preserveSemverRanges", + ":disableDependencyDashboard" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { diff --git a/setup.py b/setup.py index e7515493d..eb8066abc 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "python-dateutil >= 2.7.2, <3.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 23d2724f7..59913d588 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -18,6 +18,7 @@ pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 +python-dateutil==2.7.2 requests==2.18.0 Shapely==1.6.0 six==1.13.0 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f6f95c184..91bcff155 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -37,11 +37,6 @@ except ImportError: # pragma: NO COVER bigquery_storage = None -try: - import fastavro # to parse BQ storage client results -except ImportError: # pragma: NO COVER - fastavro = None - try: import pyarrow import pyarrow.types diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py index 70388059e..4c08958c3 100644 --- a/tests/system/test_list_rows.py +++ b/tests/system/test_list_rows.py @@ -15,6 +15,8 @@ import datetime import decimal +from dateutil import relativedelta + from google.cloud import bigquery from google.cloud.bigquery import enums @@ -64,6 +66,9 @@ def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str) assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" assert row["int64_col"] == 123456789 + assert row["interval_col"] == relativedelta.relativedelta( + years=7, months=11, days=9, hours=4, minutes=15, seconds=37, microseconds=123456 + ) assert row["numeric_col"] == decimal.Decimal("1.23456789") assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") assert row["float64_col"] == 1.25 @@ -95,6 +100,9 @@ def test_list_rows_scalars_extreme( assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) assert row["geography_col"] == "POINT(-135 90)" assert row["int64_col"] == 9223372036854775807 + assert row["interval_col"] == relativedelta.relativedelta( + years=-10000, days=-3660000, hours=-87840000 + ) assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28") assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37") assert row["float64_col"] == float("Inf") diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py new file mode 100644 index 000000000..65b054f44 --- /dev/null +++ b/tests/unit/helpers/test_from_json.py @@ -0,0 +1,157 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dateutil.relativedelta import relativedelta +import pytest + +from google.cloud.bigquery.schema import SchemaField + + +def create_field(mode="NULLABLE", type_="IGNORED"): + return SchemaField("test_field", type_, mode=mode) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +def test_interval_from_json_w_none_nullable(mut): + got = mut._interval_from_json(None, create_field()) + assert got is None + + +def test_interval_from_json_w_none_required(mut): + with pytest.raises(TypeError): + mut._interval_from_json(None, create_field(mode="REQUIRED")) + + +def test_interval_from_json_w_invalid_format(mut): + with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): + mut._interval_from_json("NOT_AN_INTERVAL", create_field()) + + +@pytest.mark.parametrize( + ("value", "expected"), + ( + ("0-0 0 0:0:0", relativedelta()), + # SELECT INTERVAL X YEAR + ("-10000-0 0 0:0:0", relativedelta(years=-10000)), + ("-1-0 0 0:0:0", relativedelta(years=-1)), + ("1-0 0 0:0:0", relativedelta(years=1)), + ("10000-0 0 0:0:0", relativedelta(years=10000)), + # SELECT INTERVAL X MONTH + ("-0-11 0 0:0:0", relativedelta(months=-11)), + ("-0-1 0 0:0:0", relativedelta(months=-1)), + ("0-1 0 0:0:0", relativedelta(months=1)), + ("0-11 0 0:0:0", relativedelta(months=11)), + # SELECT INTERVAL X DAY + ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), + ("0-0 -1 0:0:0", relativedelta(days=-1)), + ("0-0 1 0:0:0", relativedelta(days=1)), + ("0-0 3660000 0:0:0", relativedelta(days=3660000)), + # SELECT INTERVAL X HOUR + ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), + ("0-0 0 -1:0:0", relativedelta(hours=-1)), + ("0-0 0 1:0:0", relativedelta(hours=1)), + ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), + # SELECT INTERVAL X MINUTE + ("0-0 0 -0:59:0", relativedelta(minutes=-59)), + ("0-0 0 -0:1:0", relativedelta(minutes=-1)), + ("0-0 0 0:1:0", relativedelta(minutes=1)), + ("0-0 0 0:59:0", relativedelta(minutes=59)), + # SELECT INTERVAL X SECOND + ("0-0 0 -0:0:59", relativedelta(seconds=-59)), + ("0-0 0 -0:0:1", relativedelta(seconds=-1)), + ("0-0 0 0:0:1", relativedelta(seconds=1)), + ("0-0 0 0:0:59", relativedelta(seconds=59)), + # SELECT (INTERVAL -1 SECOND) / 1000000 + ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), + ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), + ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), + ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), + ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), + # Test with multiple digits in each section. + ( + "32-11 45 67:16:23.987654", + relativedelta( + years=32, + months=11, + days=45, + hours=67, + minutes=16, + seconds=23, + microseconds=987654, + ), + ), + ( + "-32-11 -45 -67:16:23.987654", + relativedelta( + years=-32, + months=-11, + days=-45, + hours=-67, + minutes=-16, + seconds=-23, + microseconds=-987654, + ), + ), + # Test with mixed +/- sections. + ( + "9999-9 -999999 9999999:59:59.999999", + relativedelta( + years=9999, + months=9, + days=-999999, + hours=9999999, + minutes=59, + seconds=59, + microseconds=999999, + ), + ), + # Test with fraction that is not microseconds. + ("0-0 0 0:0:42.", relativedelta(seconds=42)), + ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), + ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), + ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), + ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), + # Fractional seconds can cause rounding problems if cast to float. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 + ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), + ( + "0-0 0 01:01:01.010101", + relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), + ), + ( + "0-0 0 09:09:09.090909", + relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), + ), + ( + "0-0 0 11:11:11.111111", + relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), + ), + ( + "0-0 0 19:16:23.987654", + relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), + ), + # Nanoseconds are not expected, but should not cause error. + ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), + ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), + ), +) +def test_w_string_values(mut, value, expected): + got = mut._interval_from_json(value, create_field()) + assert got == expected