From 2b01606e38088141b0e92eb136238158f7f2e4be Mon Sep 17 00:00:00 2001 From: Seth Hollyman Date: Wed, 31 Mar 2021 19:21:01 +0000 Subject: [PATCH 1/4] bigquery: export HiveParititioningOptions in load job configurations Fixes: https://github.com/googleapis/google-cloud-go/issues/3876 --- bigquery/load.go | 10 ++++++++++ bigquery/load_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/bigquery/load.go b/bigquery/load.go index 9fa0190685b..0962872429a 100644 --- a/bigquery/load.go +++ b/bigquery/load.go @@ -65,6 +65,10 @@ type LoadConfig struct { // For ingestion from datastore backups, ProjectionFields governs which fields // are projected from the backup. The default behavior projects all fields. ProjectionFields []string + + // HivePartitioningOptions allows use of Hive partitioning based on the + // layout of objects in Google Cloud Storage. + HivePartitioningOptions *HivePartitioningOptions } func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) { @@ -83,6 +87,9 @@ func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) { ProjectionFields: l.ProjectionFields, }, } + if l.HivePartitioningOptions != nil { + config.Load.HivePartitioningOptions = l.HivePartitioningOptions.toBQ() + } media := l.Src.populateLoadConfig(config.Load) return config, media } @@ -111,6 +118,9 @@ func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig { fc = &s.FileConfig lc.Src = s } + if q.Load.HivePartitioningOptions != nil { + lc.HivePartitioningOptions = bqToHivePartitioningOptions(q.Load.HivePartitioningOptions) + } bqPopulateFileConfig(q.Load, fc) return lc } diff --git a/bigquery/load_test.go b/bigquery/load_test.go index eb9ec38804d..ca3fe9da75f 100644 --- a/bigquery/load_test.go +++ b/bigquery/load_test.go @@ -342,6 +342,31 @@ func TestLoad(t *testing.T) { return j }(), }, + { + dst: c.Dataset("dataset-id").Table("table-id"), + src: func() *GCSReference { + g := NewGCSReference("uri") + g.SourceFormat = Parquet + return g + }(), + config: LoadConfig{ + HivePartitioningOptions: &HivePartitioningOptions{ + Mode: CustomHivePartitioningMode, + SourceURIPrefix: "source_uri", + RequirePartitionFilter: true, + }, + }, + want: func() *bq.Job { + j := defaultLoadJob() + j.Configuration.Load.SourceFormat = "PARQUET" + j.Configuration.Load.HivePartitioningOptions = &bq.HivePartitioningOptions{ + Mode: "CUSTOM", + RequirePartitionFilter: true, + SourceUriPrefix: "source_uri", + } + return j + }(), + }, } for i, tc := range testCases { From f36201ebb10afb14e361113b3b64234209a5beed Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 2 Apr 2021 09:12:42 -0700 Subject: [PATCH 2/4] Update bigquery/load.go Co-authored-by: Tyler Bui-Palsulich <26876514+tbpg@users.noreply.github.com> --- bigquery/load.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/load.go b/bigquery/load.go index 0962872429a..963ccab0294 100644 --- a/bigquery/load.go +++ b/bigquery/load.go @@ -67,7 +67,7 @@ type LoadConfig struct { ProjectionFields []string // HivePartitioningOptions allows use of Hive partitioning based on the - // layout of objects in Google Cloud Storage. + // layout of objects in Cloud Storage. HivePartitioningOptions *HivePartitioningOptions } From 90232552765c5683a2508b9a364da64fa3a1eddb Mon Sep 17 00:00:00 2001 From: Seth Hollyman Date: Fri, 2 Apr 2021 16:33:33 +0000 Subject: [PATCH 3/4] remove unnecessary nil checks --- bigquery/external.go | 18 ++++++++---------- bigquery/load.go | 4 +--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/bigquery/external.go b/bigquery/external.go index 3f8647e568a..91905c25937 100644 --- a/bigquery/external.go +++ b/bigquery/external.go @@ -121,13 +121,14 @@ func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration { func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfig, error) { e := &ExternalDataConfig{ - SourceFormat: DataFormat(q.SourceFormat), - SourceURIs: q.SourceUris, - AutoDetect: q.Autodetect, - Compression: Compression(q.Compression), - IgnoreUnknownValues: q.IgnoreUnknownValues, - MaxBadRecords: q.MaxBadRecords, - Schema: bqToSchema(q.Schema), + SourceFormat: DataFormat(q.SourceFormat), + SourceURIs: q.SourceUris, + AutoDetect: q.Autodetect, + Compression: Compression(q.Compression), + IgnoreUnknownValues: q.IgnoreUnknownValues, + MaxBadRecords: q.MaxBadRecords, + Schema: bqToSchema(q.Schema), + HivePartitioningOptions: bqToHivePartitioningOptions(q.HivePartitioningOptions), } switch { case q.CsvOptions != nil: @@ -141,9 +142,6 @@ func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfi return nil, err } } - if q.HivePartitioningOptions != nil { - e.HivePartitioningOptions = bqToHivePartitioningOptions(q.HivePartitioningOptions) - } return e, nil } diff --git a/bigquery/load.go b/bigquery/load.go index 963ccab0294..0b0f6c55fb5 100644 --- a/bigquery/load.go +++ b/bigquery/load.go @@ -85,11 +85,9 @@ func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) { SchemaUpdateOptions: l.SchemaUpdateOptions, UseAvroLogicalTypes: l.UseAvroLogicalTypes, ProjectionFields: l.ProjectionFields, + HivePartitioningOptions: l.HivePartitioningOptions.toBQ(), }, } - if l.HivePartitioningOptions != nil { - config.Load.HivePartitioningOptions = l.HivePartitioningOptions.toBQ() - } media := l.Src.populateLoadConfig(config.Load) return config, media } From 40fc8d56bb479c799fd37b1b07d90aa9e597dcea Mon Sep 17 00:00:00 2001 From: Seth Hollyman Date: Fri, 2 Apr 2021 17:49:30 +0000 Subject: [PATCH 4/4] really commit all the deltas --- bigquery/external.go | 16 +++++++--------- bigquery/load.go | 4 +--- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/bigquery/external.go b/bigquery/external.go index 91905c25937..36a1ab64ac3 100644 --- a/bigquery/external.go +++ b/bigquery/external.go @@ -100,19 +100,17 @@ type ExternalDataConfig struct { func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration { q := bq.ExternalDataConfiguration{ - SourceFormat: string(e.SourceFormat), - SourceUris: e.SourceURIs, - Autodetect: e.AutoDetect, - Compression: string(e.Compression), - IgnoreUnknownValues: e.IgnoreUnknownValues, - MaxBadRecords: e.MaxBadRecords, + SourceFormat: string(e.SourceFormat), + SourceUris: e.SourceURIs, + Autodetect: e.AutoDetect, + Compression: string(e.Compression), + IgnoreUnknownValues: e.IgnoreUnknownValues, + MaxBadRecords: e.MaxBadRecords, + HivePartitioningOptions: e.HivePartitioningOptions.toBQ(), } if e.Schema != nil { q.Schema = e.Schema.toBQ() } - if e.HivePartitioningOptions != nil { - q.HivePartitioningOptions = e.HivePartitioningOptions.toBQ() - } if e.Options != nil { e.Options.populateExternalDataConfig(&q) } diff --git a/bigquery/load.go b/bigquery/load.go index 0b0f6c55fb5..1b2d3c6bfa7 100644 --- a/bigquery/load.go +++ b/bigquery/load.go @@ -105,6 +105,7 @@ func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig { SchemaUpdateOptions: q.Load.SchemaUpdateOptions, UseAvroLogicalTypes: q.Load.UseAvroLogicalTypes, ProjectionFields: q.Load.ProjectionFields, + HivePartitioningOptions: bqToHivePartitioningOptions(q.Load.HivePartitioningOptions), } var fc *FileConfig if len(q.Load.SourceUris) == 0 { @@ -116,9 +117,6 @@ func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig { fc = &s.FileConfig lc.Src = s } - if q.Load.HivePartitioningOptions != nil { - lc.HivePartitioningOptions = bqToHivePartitioningOptions(q.Load.HivePartitioningOptions) - } bqPopulateFileConfig(q.Load, fc) return lc }