Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bigquery): support decimalTargetType prioritization #4343

Merged
merged 6 commits into from Jul 30, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 14 additions & 0 deletions bigquery/external.go
Expand Up @@ -96,6 +96,14 @@ type ExternalDataConfig struct {
// HivePartitioningOptions allows use of Hive partitioning based on the
// layout of objects in Google Cloud Storage.
HivePartitioningOptions *HivePartitioningOptions

// DecimalTargetTypes allows selection of how decimal values are converted when
// processed in bigquery, subject to the value type having sufficient precision/scale
// to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is
// selected if is present in the list and if supports the necessary precision and scale.
//
// StringTargetType supports all precision and scale values.
DecimalTargetTypes []DecimalTargetType
}

func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
Expand All @@ -114,6 +122,9 @@ func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
if e.Options != nil {
e.Options.populateExternalDataConfig(&q)
}
for _, v := range e.DecimalTargetTypes {
q.DecimalTargetTypes = append(q.DecimalTargetTypes, string(v))
}
return q
}

Expand All @@ -128,6 +139,9 @@ func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfi
Schema: bqToSchema(q.Schema),
HivePartitioningOptions: bqToHivePartitioningOptions(q.HivePartitioningOptions),
}
for _, v := range q.DecimalTargetTypes {
e.DecimalTargetTypes = append(e.DecimalTargetTypes, DecimalTargetType(v))
}
switch {
case q.CsvOptions != nil:
e.Options = bqToCSVOptions(q.CsvOptions)
Expand Down
4 changes: 4 additions & 0 deletions bigquery/external_test.go
Expand Up @@ -87,6 +87,10 @@ func TestExternalDataConfig(t *testing.T) {
EnableListInference: true,
},
},
{
SourceFormat: Parquet,
DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType},
},
} {
q := want.toBQ()
got, err := bqToExternalDataConfig(&q)
Expand Down
14 changes: 8 additions & 6 deletions bigquery/integration_test.go
Expand Up @@ -2208,9 +2208,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {

err := autoTable.Create(ctx, &TableMetadata{
ExternalDataConfig: &ExternalDataConfig{
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
AutoDetect: true,
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
AutoDetect: true,
DecimalTargetTypes: []DecimalTargetType{StringTargetType},
HivePartitioningOptions: &HivePartitioningOptions{
Mode: AutoHivePartitioningMode,
SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/",
Expand All @@ -2225,9 +2226,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {

err = customTable.Create(ctx, &TableMetadata{
ExternalDataConfig: &ExternalDataConfig{
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"},
AutoDetect: true,
SourceFormat: Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"},
AutoDetect: true,
DecimalTargetTypes: []DecimalTargetType{NumericTargetType, StringTargetType},
HivePartitioningOptions: &HivePartitioningOptions{
Mode: CustomHivePartitioningMode,
SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/",
Expand Down
28 changes: 28 additions & 0 deletions bigquery/load.go
Expand Up @@ -69,6 +69,14 @@ type LoadConfig struct {
// HivePartitioningOptions allows use of Hive partitioning based on the
// layout of objects in Cloud Storage.
HivePartitioningOptions *HivePartitioningOptions

// DecimalTargetTypes allows selection of how decimal values are converted when
// processed in bigquery, subject to the value type having sufficient precision/scale
// to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is
// selected if is present in the list and if supports the necessary precision and scale.
//
// StringTargetType supports all precision and scale values.
DecimalTargetTypes []DecimalTargetType
}

func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
Expand All @@ -88,6 +96,9 @@ func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
HivePartitioningOptions: l.HivePartitioningOptions.toBQ(),
},
}
for _, v := range l.DecimalTargetTypes {
config.Load.DecimalTargetTypes = append(config.Load.DecimalTargetTypes, string(v))
}
media := l.Src.populateLoadConfig(config.Load)
return config, media
}
Expand All @@ -107,6 +118,9 @@ func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig {
ProjectionFields: q.Load.ProjectionFields,
HivePartitioningOptions: bqToHivePartitioningOptions(q.Load.HivePartitioningOptions),
}
for _, v := range q.Load.DecimalTargetTypes {
lc.DecimalTargetTypes = append(lc.DecimalTargetTypes, DecimalTargetType(v))
}
var fc *FileConfig
if len(q.Load.SourceUris) == 0 {
s := NewReaderSource(nil)
Expand Down Expand Up @@ -168,3 +182,17 @@ func (l *Loader) newJob() (*bq.Job, io.Reader) {
Configuration: config,
}, media
}

// DecimalTargetType is used to express preference ordering for converting values from external formats.
type DecimalTargetType string

var (
// NumericTargetType indicates the preferred type is NUMERIC when supported.
NumericTargetType DecimalTargetType = "NUMERIC"

// BigNumericTargetType indicates the preferred type is BIGNUMERIC when supported.
BigNumericTargetType DecimalTargetType = "BIGNUMERIC"

// StringTargetType indicates the preferred type is STRING when supported.
StringTargetType DecimalTargetType = "STRING"
)
17 changes: 17 additions & 0 deletions bigquery/load_test.go
Expand Up @@ -367,6 +367,23 @@ func TestLoad(t *testing.T) {
return j
}(),
},
{
dst: c.Dataset("dataset-id").Table("table-id"),
src: func() *GCSReference {
g := NewGCSReference("uri")
g.SourceFormat = Parquet
return g
}(),
config: LoadConfig{
DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType},
},
want: func() *bq.Job {
j := defaultLoadJob()
j.Configuration.Load.SourceFormat = "PARQUET"
j.Configuration.Load.DecimalTargetTypes = []string{"BIGNUMERIC", "NUMERIC", "STRING"}
return j
}(),
},
}

for i, tc := range testCases {
Expand Down