diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index dfe3a6320..b97224176 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -38,6 +38,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import DecimalTargetType from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes @@ -148,6 +149,7 @@ "AutoRowIDs", "Compression", "CreateDisposition", + "DecimalTargetType", "DestinationFormat", "DeterminismLevel", "ExternalSourceFormat", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index dbbd02635..ef35dffe0 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -49,6 +49,24 @@ class Compression(object): """Specifies no compression.""" +class DecimalTargetType: + """The data types that could be used as a target type when converting decimal values. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType + + .. versionadded:: 2.21.0 + """ + + NUMERIC = "NUMERIC" + """Decimal values could be converted to NUMERIC type.""" + + BIGNUMERIC = "BIGNUMERIC" + """Decimal values could be converted to BIGNUMERIC type.""" + + STRING = "STRING" + """Decimal values could be converted to STRING type.""" + + class CreateDisposition(object): """Specifies whether the job is allowed to create new tables. The default value is :attr:`CREATE_IF_NEEDED`. diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 0c49d2d76..f1692ba50 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +from typing import FrozenSet, Iterable, Optional from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json @@ -693,6 +694,28 @@ def compression(self): def compression(self, value): self._properties["compression"] = value + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._properties.get("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._properties["decimalTargetTypes"] = list(value) + else: + if "decimalTargetTypes" in self._properties: + del self._properties["decimalTargetTypes"] + @property def hive_partitioning(self): """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 41d38dd74..bdee5cb6b 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -14,6 +14,8 @@ """Classes for load jobs.""" +from typing import FrozenSet, Iterable, Optional + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions @@ -121,6 +123,27 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop("createDisposition", value) + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._get_sub_prop("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._set_sub_prop("decimalTargetTypes", list(value)) + else: + self._del_sub_prop("decimalTargetTypes") + @property def destination_encryption_configuration(self): """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom diff --git a/tests/data/numeric_38_12.parquet b/tests/data/numeric_38_12.parquet new file mode 100644 index 000000000..ef4db91ea Binary files /dev/null and b/tests/data/numeric_38_12.parquet differ diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ce3021399..460296b2f 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -864,6 +864,60 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) + def test_load_table_from_local_parquet_file_decimal_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + TABLE_NAME = "test_table_parquet" + + expected_rows = [ + (decimal.Decimal("123.999999999999"),), + (decimal.Decimal("99999999999999999999999999.999999999999"),), + ] + + dataset = self.temp_dataset(_make_dataset_id("load_local_parquet_then_dump")) + table_ref = dataset.table(TABLE_NAME) + table = Table(table_ref) + self.to_delete.insert(0, table) + + job_config = bigquery.LoadJobConfig() + job_config.source_format = SourceFormat.PARQUET + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.decimal_target_types = [ + DecimalTargetType.NUMERIC, + DecimalTargetType.BIGNUMERIC, + DecimalTargetType.STRING, + ] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + job.result(timeout=JOB_TIMEOUT) # Retry until done. + + self.assertEqual(job.output_rows, len(expected_rows)) + + table = Config.CLIENT.get_table(table) + rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] + self.assertEqual(sorted(row_tuples), sorted(expected_rows)) + + # Forcing the NUMERIC type, however, should result in an error. + job_config.decimal_target_types = [DecimalTargetType.NUMERIC] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + with self.assertRaises(BadRequest) as exc_info: + job.result(timeout=JOB_TIMEOUT) + + exc_msg = str(exc_info.exception) + self.assertIn("out of valid NUMERIC range", exc_msg) + def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index eafe7e046..190bd16dc 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -122,6 +122,45 @@ def test_create_disposition_setter(self): config.create_disposition = disposition self.assertEqual(config._properties["load"]["createDisposition"], disposition) + def test_decimal_target_types_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.decimal_target_types) + + def test_decimal_target_types_hit(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + expected = frozenset(decimal_target_types) + self.assertEqual(config.decimal_target_types, expected) + + def test_decimal_target_types_setter(self): + from google.cloud.bigquery.enums import DecimalTargetType + + decimal_target_types = (DecimalTargetType.NUMERIC, DecimalTargetType.BIGNUMERIC) + config = self._get_target_class()() + config.decimal_target_types = decimal_target_types + self.assertEqual( + config._properties["load"]["decimalTargetTypes"], + list(decimal_target_types), + ) + + def test_decimal_target_types_setter_w_none(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.BIGNUMERIC] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + config.decimal_target_types = None + + self.assertIsNone(config.decimal_target_types) + self.assertNotIn("decimalTargetTypes", config._properties["load"]) + + config.decimal_target_types = None # No error if unsetting an unset property. + def test_destination_encryption_configuration_missing(self): config = self._get_target_class()() self.assertIsNone(config.destination_encryption_configuration) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 393df931e..1f49dba5d 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -532,6 +532,64 @@ def test_to_api_repr_parquet(self): self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC], + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, "FORMAT_FOO") + self.assertEqual( + ec.decimal_target_types, frozenset([DecimalTargetType.NUMERIC]) + ) + + # converting back to API representation should yield the same result + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + del resource["decimalTargetTypes"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.decimal_target_types) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec.decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + + got_resource = ec.to_api_repr() + + expected_resource = { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC, DecimalTargetType.STRING], + } + self.assertEqual(got_resource, expected_resource) + + def test_to_api_repr_decimal_target_types_unset(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec._properties["decimalTargetTypes"] = [DecimalTargetType.NUMERIC] + ec.decimal_target_types = None + + got_resource = ec.to_api_repr() + + expected_resource = {"sourceFormat": "FORMAT_FOO"} + self.assertEqual(got_resource, expected_resource) + + ec.decimal_target_types = None # No error if unsetting when already unset. + def _copy_and_update(d, u): d = copy.deepcopy(d)