Skip to content

Commit

Permalink
feat: add support for decimal target types (#735)
Browse files Browse the repository at this point in the history
* feat: add support for decimal target types

* Add decimal target types support to ExternalConfig

* Remove ambiguous parts of DecimalTargetType docs.
  • Loading branch information
plamut committed Jul 10, 2021
1 parent cd2f09e commit 7d2d3e9
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 0 deletions.
2 changes: 2 additions & 0 deletions google/cloud/bigquery/__init__.py
Expand Up @@ -38,6 +38,7 @@
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery import enums
from google.cloud.bigquery.enums import AutoRowIDs
from google.cloud.bigquery.enums import DecimalTargetType
from google.cloud.bigquery.enums import KeyResultStatementKind
from google.cloud.bigquery.enums import SqlTypeNames
from google.cloud.bigquery.enums import StandardSqlDataTypes
Expand Down Expand Up @@ -148,6 +149,7 @@
"AutoRowIDs",
"Compression",
"CreateDisposition",
"DecimalTargetType",
"DestinationFormat",
"DeterminismLevel",
"ExternalSourceFormat",
Expand Down
18 changes: 18 additions & 0 deletions google/cloud/bigquery/enums.py
Expand Up @@ -49,6 +49,24 @@ class Compression(object):
"""Specifies no compression."""


class DecimalTargetType:
"""The data types that could be used as a target type when converting decimal values.
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType
.. versionadded:: 2.21.0
"""

NUMERIC = "NUMERIC"
"""Decimal values could be converted to NUMERIC type."""

BIGNUMERIC = "BIGNUMERIC"
"""Decimal values could be converted to BIGNUMERIC type."""

STRING = "STRING"
"""Decimal values could be converted to STRING type."""


class CreateDisposition(object):
"""Specifies whether the job is allowed to create new tables. The default
value is :attr:`CREATE_IF_NEEDED`.
Expand Down
23 changes: 23 additions & 0 deletions google/cloud/bigquery/external_config.py
Expand Up @@ -22,6 +22,7 @@

import base64
import copy
from typing import FrozenSet, Iterable, Optional

from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
Expand Down Expand Up @@ -693,6 +694,28 @@ def compression(self):
def compression(self, value):
self._properties["compression"] = value

@property
def decimal_target_types(self) -> Optional[FrozenSet[str]]:
"""Possible SQL data types to which the source decimal values are converted.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
.. versionadded:: 2.21.0
"""
prop = self._properties.get("decimalTargetTypes")
if prop is not None:
prop = frozenset(prop)
return prop

@decimal_target_types.setter
def decimal_target_types(self, value: Optional[Iterable[str]]):
if value is not None:
self._properties["decimalTargetTypes"] = list(value)
else:
if "decimalTargetTypes" in self._properties:
del self._properties["decimalTargetTypes"]

@property
def hive_partitioning(self):
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
Expand Down
23 changes: 23 additions & 0 deletions google/cloud/bigquery/job/load.py
Expand Up @@ -14,6 +14,8 @@

"""Classes for load jobs."""

from typing import FrozenSet, Iterable, Optional

from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.external_config import HivePartitioningOptions
from google.cloud.bigquery.format_options import ParquetOptions
Expand Down Expand Up @@ -121,6 +123,27 @@ def create_disposition(self):
def create_disposition(self, value):
self._set_sub_prop("createDisposition", value)

@property
def decimal_target_types(self) -> Optional[FrozenSet[str]]:
"""Possible SQL data types to which the source decimal values are converted.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types
.. versionadded:: 2.21.0
"""
prop = self._get_sub_prop("decimalTargetTypes")
if prop is not None:
prop = frozenset(prop)
return prop

@decimal_target_types.setter
def decimal_target_types(self, value: Optional[Iterable[str]]):
if value is not None:
self._set_sub_prop("decimalTargetTypes", list(value))
else:
self._del_sub_prop("decimalTargetTypes")

@property
def destination_encryption_configuration(self):
"""Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom
Expand Down
Binary file added tests/data/numeric_38_12.parquet
Binary file not shown.
54 changes: 54 additions & 0 deletions tests/system/test_client.py
Expand Up @@ -864,6 +864,60 @@ def test_load_table_from_local_avro_file_then_dump_table(self):
sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)
)

def test_load_table_from_local_parquet_file_decimal_types(self):
from google.cloud.bigquery.enums import DecimalTargetType
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import WriteDisposition

TABLE_NAME = "test_table_parquet"

expected_rows = [
(decimal.Decimal("123.999999999999"),),
(decimal.Decimal("99999999999999999999999999.999999999999"),),
]

dataset = self.temp_dataset(_make_dataset_id("load_local_parquet_then_dump"))
table_ref = dataset.table(TABLE_NAME)
table = Table(table_ref)
self.to_delete.insert(0, table)

job_config = bigquery.LoadJobConfig()
job_config.source_format = SourceFormat.PARQUET
job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
job_config.decimal_target_types = [
DecimalTargetType.NUMERIC,
DecimalTargetType.BIGNUMERIC,
DecimalTargetType.STRING,
]

with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file:
job = Config.CLIENT.load_table_from_file(
parquet_file, table_ref, job_config=job_config
)

job.result(timeout=JOB_TIMEOUT) # Retry until done.

self.assertEqual(job.output_rows, len(expected_rows))

table = Config.CLIENT.get_table(table)
rows = self._fetch_single_page(table)
row_tuples = [r.values() for r in rows]
self.assertEqual(sorted(row_tuples), sorted(expected_rows))

# Forcing the NUMERIC type, however, should result in an error.
job_config.decimal_target_types = [DecimalTargetType.NUMERIC]

with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file:
job = Config.CLIENT.load_table_from_file(
parquet_file, table_ref, job_config=job_config
)

with self.assertRaises(BadRequest) as exc_info:
job.result(timeout=JOB_TIMEOUT)

exc_msg = str(exc_info.exception)
self.assertIn("out of valid NUMERIC range", exc_msg)

def test_load_table_from_json_basic_use(self):
table_schema = (
bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
Expand Down
39 changes: 39 additions & 0 deletions tests/unit/job/test_load_config.py
Expand Up @@ -122,6 +122,45 @@ def test_create_disposition_setter(self):
config.create_disposition = disposition
self.assertEqual(config._properties["load"]["createDisposition"], disposition)

def test_decimal_target_types_miss(self):
config = self._get_target_class()()
self.assertIsNone(config.decimal_target_types)

def test_decimal_target_types_hit(self):
from google.cloud.bigquery.enums import DecimalTargetType

config = self._get_target_class()()
decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING]
config._properties["load"]["decimalTargetTypes"] = decimal_target_types

expected = frozenset(decimal_target_types)
self.assertEqual(config.decimal_target_types, expected)

def test_decimal_target_types_setter(self):
from google.cloud.bigquery.enums import DecimalTargetType

decimal_target_types = (DecimalTargetType.NUMERIC, DecimalTargetType.BIGNUMERIC)
config = self._get_target_class()()
config.decimal_target_types = decimal_target_types
self.assertEqual(
config._properties["load"]["decimalTargetTypes"],
list(decimal_target_types),
)

def test_decimal_target_types_setter_w_none(self):
from google.cloud.bigquery.enums import DecimalTargetType

config = self._get_target_class()()
decimal_target_types = [DecimalTargetType.BIGNUMERIC]
config._properties["load"]["decimalTargetTypes"] = decimal_target_types

config.decimal_target_types = None

self.assertIsNone(config.decimal_target_types)
self.assertNotIn("decimalTargetTypes", config._properties["load"])

config.decimal_target_types = None # No error if unsetting an unset property.

def test_destination_encryption_configuration_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.destination_encryption_configuration)
Expand Down
58 changes: 58 additions & 0 deletions tests/unit/test_external_config.py
Expand Up @@ -532,6 +532,64 @@ def test_to_api_repr_parquet(self):

self.assertEqual(got_resource, exp_resource)

def test_from_api_repr_decimal_target_types(self):
from google.cloud.bigquery.enums import DecimalTargetType

resource = _copy_and_update(
self.BASE_RESOURCE,
{
"sourceFormat": "FORMAT_FOO",
"decimalTargetTypes": [DecimalTargetType.NUMERIC],
},
)

ec = external_config.ExternalConfig.from_api_repr(resource)

self._verify_base(ec)
self.assertEqual(ec.source_format, "FORMAT_FOO")
self.assertEqual(
ec.decimal_target_types, frozenset([DecimalTargetType.NUMERIC])
)

# converting back to API representation should yield the same result
got_resource = ec.to_api_repr()
self.assertEqual(got_resource, resource)

del resource["decimalTargetTypes"]
ec = external_config.ExternalConfig.from_api_repr(resource)
self.assertIsNone(ec.decimal_target_types)

got_resource = ec.to_api_repr()
self.assertEqual(got_resource, resource)

def test_to_api_repr_decimal_target_types(self):
from google.cloud.bigquery.enums import DecimalTargetType

ec = external_config.ExternalConfig("FORMAT_FOO")
ec.decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING]

got_resource = ec.to_api_repr()

expected_resource = {
"sourceFormat": "FORMAT_FOO",
"decimalTargetTypes": [DecimalTargetType.NUMERIC, DecimalTargetType.STRING],
}
self.assertEqual(got_resource, expected_resource)

def test_to_api_repr_decimal_target_types_unset(self):
from google.cloud.bigquery.enums import DecimalTargetType

ec = external_config.ExternalConfig("FORMAT_FOO")
ec._properties["decimalTargetTypes"] = [DecimalTargetType.NUMERIC]
ec.decimal_target_types = None

got_resource = ec.to_api_repr()

expected_resource = {"sourceFormat": "FORMAT_FOO"}
self.assertEqual(got_resource, expected_resource)

ec.decimal_target_types = None # No error if unsetting when already unset.


def _copy_and_update(d, u):
d = copy.deepcopy(d)
Expand Down

0 comments on commit 7d2d3e9

Please sign in to comment.