From 95a27f711a1c7dfdaa16ae5d3c52644769b6fc39 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 30 Jul 2021 10:39:56 -0700 Subject: [PATCH] feat(bigquery): support decimalTargetType prioritization (#4343) * feat(bigquery): support decimalTargetType prioritization Adds support to govern how values from external formats are converted to a corresponding BigQuery type in load jobs and federated table definitions. --- bigquery/external.go | 14 ++++++++++++++ bigquery/external_test.go | 4 ++++ bigquery/integration_test.go | 14 ++++++++------ bigquery/load.go | 28 ++++++++++++++++++++++++++++ bigquery/load_test.go | 17 +++++++++++++++++ 5 files changed, 71 insertions(+), 6 deletions(-) diff --git a/bigquery/external.go b/bigquery/external.go index 140bd446af8..500a2927adf 100644 --- a/bigquery/external.go +++ b/bigquery/external.go @@ -96,6 +96,14 @@ type ExternalDataConfig struct { // HivePartitioningOptions allows use of Hive partitioning based on the // layout of objects in Google Cloud Storage. HivePartitioningOptions *HivePartitioningOptions + + // DecimalTargetTypes allows selection of how decimal values are converted when + // processed in bigquery, subject to the value type having sufficient precision/scale + // to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is + // selected if is present in the list and if supports the necessary precision and scale. + // + // StringTargetType supports all precision and scale values. + DecimalTargetTypes []DecimalTargetType } func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration { @@ -114,6 +122,9 @@ func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration { if e.Options != nil { e.Options.populateExternalDataConfig(&q) } + for _, v := range e.DecimalTargetTypes { + q.DecimalTargetTypes = append(q.DecimalTargetTypes, string(v)) + } return q } @@ -128,6 +139,9 @@ func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfi Schema: bqToSchema(q.Schema), HivePartitioningOptions: bqToHivePartitioningOptions(q.HivePartitioningOptions), } + for _, v := range q.DecimalTargetTypes { + e.DecimalTargetTypes = append(e.DecimalTargetTypes, DecimalTargetType(v)) + } switch { case q.CsvOptions != nil: e.Options = bqToCSVOptions(q.CsvOptions) diff --git a/bigquery/external_test.go b/bigquery/external_test.go index 583948b0981..75aa3ab9093 100644 --- a/bigquery/external_test.go +++ b/bigquery/external_test.go @@ -87,6 +87,10 @@ func TestExternalDataConfig(t *testing.T) { EnableListInference: true, }, }, + { + SourceFormat: Parquet, + DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType}, + }, } { q := want.toBQ() got, err := bqToExternalDataConfig(&q) diff --git a/bigquery/integration_test.go b/bigquery/integration_test.go index 09f72d6be8a..0df0587f1ed 100644 --- a/bigquery/integration_test.go +++ b/bigquery/integration_test.go @@ -2208,9 +2208,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) { err := autoTable.Create(ctx, &TableMetadata{ ExternalDataConfig: &ExternalDataConfig{ - SourceFormat: Parquet, - SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"}, - AutoDetect: true, + SourceFormat: Parquet, + SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"}, + AutoDetect: true, + DecimalTargetTypes: []DecimalTargetType{StringTargetType}, HivePartitioningOptions: &HivePartitioningOptions{ Mode: AutoHivePartitioningMode, SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/", @@ -2225,9 +2226,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) { err = customTable.Create(ctx, &TableMetadata{ ExternalDataConfig: &ExternalDataConfig{ - SourceFormat: Parquet, - SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"}, - AutoDetect: true, + SourceFormat: Parquet, + SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"}, + AutoDetect: true, + DecimalTargetTypes: []DecimalTargetType{NumericTargetType, StringTargetType}, HivePartitioningOptions: &HivePartitioningOptions{ Mode: CustomHivePartitioningMode, SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/", diff --git a/bigquery/load.go b/bigquery/load.go index 1b2d3c6bfa7..c8487bc5b55 100644 --- a/bigquery/load.go +++ b/bigquery/load.go @@ -69,6 +69,14 @@ type LoadConfig struct { // HivePartitioningOptions allows use of Hive partitioning based on the // layout of objects in Cloud Storage. HivePartitioningOptions *HivePartitioningOptions + + // DecimalTargetTypes allows selection of how decimal values are converted when + // processed in bigquery, subject to the value type having sufficient precision/scale + // to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is + // selected if is present in the list and if supports the necessary precision and scale. + // + // StringTargetType supports all precision and scale values. + DecimalTargetTypes []DecimalTargetType } func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) { @@ -88,6 +96,9 @@ func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) { HivePartitioningOptions: l.HivePartitioningOptions.toBQ(), }, } + for _, v := range l.DecimalTargetTypes { + config.Load.DecimalTargetTypes = append(config.Load.DecimalTargetTypes, string(v)) + } media := l.Src.populateLoadConfig(config.Load) return config, media } @@ -107,6 +118,9 @@ func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig { ProjectionFields: q.Load.ProjectionFields, HivePartitioningOptions: bqToHivePartitioningOptions(q.Load.HivePartitioningOptions), } + for _, v := range q.Load.DecimalTargetTypes { + lc.DecimalTargetTypes = append(lc.DecimalTargetTypes, DecimalTargetType(v)) + } var fc *FileConfig if len(q.Load.SourceUris) == 0 { s := NewReaderSource(nil) @@ -168,3 +182,17 @@ func (l *Loader) newJob() (*bq.Job, io.Reader) { Configuration: config, }, media } + +// DecimalTargetType is used to express preference ordering for converting values from external formats. +type DecimalTargetType string + +var ( + // NumericTargetType indicates the preferred type is NUMERIC when supported. + NumericTargetType DecimalTargetType = "NUMERIC" + + // BigNumericTargetType indicates the preferred type is BIGNUMERIC when supported. + BigNumericTargetType DecimalTargetType = "BIGNUMERIC" + + // StringTargetType indicates the preferred type is STRING when supported. + StringTargetType DecimalTargetType = "STRING" +) diff --git a/bigquery/load_test.go b/bigquery/load_test.go index ca3fe9da75f..f6a3a2a2c8d 100644 --- a/bigquery/load_test.go +++ b/bigquery/load_test.go @@ -367,6 +367,23 @@ func TestLoad(t *testing.T) { return j }(), }, + { + dst: c.Dataset("dataset-id").Table("table-id"), + src: func() *GCSReference { + g := NewGCSReference("uri") + g.SourceFormat = Parquet + return g + }(), + config: LoadConfig{ + DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType}, + }, + want: func() *bq.Job { + j := defaultLoadJob() + j.Configuration.Load.SourceFormat = "PARQUET" + j.Configuration.Load.DecimalTargetTypes = []string{"BIGNUMERIC", "NUMERIC", "STRING"} + return j + }(), + }, } for i, tc := range testCases {