Skip to content

Commit

Permalink
feat(bigquery): support decimalTargetType prioritization (#4343)
Browse files Browse the repository at this point in the history
* feat(bigquery): support decimalTargetType prioritization

Adds support to govern how values from external formats are converted
to a corresponding BigQuery type in load jobs and federated table
definitions.
  • Loading branch information
shollyman committed Jul 30, 2021
1 parent c355eb8 commit 95a27f7
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 6 deletions.
14 changes: 14 additions & 0 deletions bigquery/external.go
Expand Up @@ -96,6 +96,14 @@ type ExternalDataConfig struct {
// HivePartitioningOptions allows use of Hive partitioning based on the
// layout of objects in Google Cloud Storage.
HivePartitioningOptions *HivePartitioningOptions

// DecimalTargetTypes allows selection of how decimal values are converted when
// processed in bigquery, subject to the value type having sufficient precision/scale
// to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is
// selected if is present in the list and if supports the necessary precision and scale.
//
// StringTargetType supports all precision and scale values.
DecimalTargetTypes []DecimalTargetType
}

func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
Expand All @@ -114,6 +122,9 @@ func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
if e.Options != nil {
e.Options.populateExternalDataConfig(&q)
}
for _, v := range e.DecimalTargetTypes {
q.DecimalTargetTypes = append(q.DecimalTargetTypes, string(v))
}
return q
}

Expand All @@ -128,6 +139,9 @@ func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfi
Schema: bqToSchema(q.Schema),
HivePartitioningOptions: bqToHivePartitioningOptions(q.HivePartitioningOptions),
}
for _, v := range q.DecimalTargetTypes {
e.DecimalTargetTypes = append(e.DecimalTargetTypes, DecimalTargetType(v))
}
switch {
case q.CsvOptions != nil:
e.Options = bqToCSVOptions(q.CsvOptions)
Expand Down
4 changes: 4 additions & 0 deletions bigquery/external_test.go
Expand Up @@ -87,6 +87,10 @@ func TestExternalDataConfig(t *testing.T) {
EnableListInference: true,
},
},
{
SourceFormat: Parquet,
DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType},
},
} {
q := want.toBQ()
got, err := bqToExternalDataConfig(&q)
Expand Down
14 changes: 8 additions & 6 deletions bigquery/integration_test.go
Expand Up @@ -2208,9 +2208,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {

err := autoTable.Create(ctx, &TableMetadata{
ExternalDataConfig: &ExternalDataConfig{
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
AutoDetect: true,
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
AutoDetect: true,
DecimalTargetTypes: []DecimalTargetType{StringTargetType},
HivePartitioningOptions: &HivePartitioningOptions{
Mode: AutoHivePartitioningMode,
SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/",
Expand All @@ -2225,9 +2226,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {

err = customTable.Create(ctx, &TableMetadata{
ExternalDataConfig: &ExternalDataConfig{
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"},
AutoDetect: true,
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"},
AutoDetect: true,
DecimalTargetTypes: []DecimalTargetType{NumericTargetType, StringTargetType},
HivePartitioningOptions: &HivePartitioningOptions{
Mode: CustomHivePartitioningMode,
SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/",
Expand Down
28 changes: 28 additions & 0 deletions bigquery/load.go
Expand Up @@ -69,6 +69,14 @@ type LoadConfig struct {
// HivePartitioningOptions allows use of Hive partitioning based on the
// layout of objects in Cloud Storage.
HivePartitioningOptions *HivePartitioningOptions

// DecimalTargetTypes allows selection of how decimal values are converted when
// processed in bigquery, subject to the value type having sufficient precision/scale
// to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is
// selected if is present in the list and if supports the necessary precision and scale.
//
// StringTargetType supports all precision and scale values.
DecimalTargetTypes []DecimalTargetType
}

func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
Expand All @@ -88,6 +96,9 @@ func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
HivePartitioningOptions: l.HivePartitioningOptions.toBQ(),
},
}
for _, v := range l.DecimalTargetTypes {
config.Load.DecimalTargetTypes = append(config.Load.DecimalTargetTypes, string(v))
}
media := l.Src.populateLoadConfig(config.Load)
return config, media
}
Expand All @@ -107,6 +118,9 @@ func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig {
ProjectionFields: q.Load.ProjectionFields,
HivePartitioningOptions: bqToHivePartitioningOptions(q.Load.HivePartitioningOptions),
}
for _, v := range q.Load.DecimalTargetTypes {
lc.DecimalTargetTypes = append(lc.DecimalTargetTypes, DecimalTargetType(v))
}
var fc *FileConfig
if len(q.Load.SourceUris) == 0 {
s := NewReaderSource(nil)
Expand Down Expand Up @@ -168,3 +182,17 @@ func (l *Loader) newJob() (*bq.Job, io.Reader) {
Configuration: config,
}, media
}

// DecimalTargetType is used to express preference ordering for converting values from external formats.
type DecimalTargetType string

var (
// NumericTargetType indicates the preferred type is NUMERIC when supported.
NumericTargetType DecimalTargetType = "NUMERIC"

// BigNumericTargetType indicates the preferred type is BIGNUMERIC when supported.
BigNumericTargetType DecimalTargetType = "BIGNUMERIC"

// StringTargetType indicates the preferred type is STRING when supported.
StringTargetType DecimalTargetType = "STRING"
)
17 changes: 17 additions & 0 deletions bigquery/load_test.go
Expand Up @@ -367,6 +367,23 @@ func TestLoad(t *testing.T) {
return j
}(),
},
{
dst: c.Dataset("dataset-id").Table("table-id"),
src: func() *GCSReference {
g := NewGCSReference("uri")
g.SourceFormat = Parquet
return g
}(),
config: LoadConfig{
DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType},
},
want: func() *bq.Job {
j := defaultLoadJob()
j.Configuration.Load.SourceFormat = "PARQUET"
j.Configuration.Load.DecimalTargetTypes = []string{"BIGNUMERIC", "NUMERIC", "STRING"}
return j
}(),
},
}

for i, tc := range testCases {
Expand Down

0 comments on commit 95a27f7

Please sign in to comment.