From 72b17151c4f3c4a3d298d3791e58c3112a14b4f5 Mon Sep 17 00:00:00 2001 From: Stephanie Wang Date: Tue, 18 May 2021 14:06:34 -0400 Subject: [PATCH] feat: add ParquetOptions support and expose it in LoadJobConfiguration and ExternalTableDefinition classes (#1318) * feat: add ParquetOptions support and expose it in LoadJobConfiguration and ExternalTableDefinition classes Fixes #1302 * update IT --- .../bigquery/ExternalTableDefinition.java | 8 ++ .../google/cloud/bigquery/FormatOptions.java | 2 + .../cloud/bigquery/LoadJobConfiguration.java | 8 ++ .../google/cloud/bigquery/ParquetOptions.java | 127 ++++++++++++++++++ .../cloud/bigquery/ParquetOptionsTest.java | 67 +++++++++ .../cloud/bigquery/it/ITBigQueryTest.java | 7 + 6 files changed, 219 insertions(+) create mode 100644 google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java create mode 100644 google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java index 5eb0f4c57..1d2cfa293 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java @@ -449,6 +449,10 @@ static ExternalTableDefinition fromPb(Table tablePb) { builder.setFormatOptions( BigtableOptions.fromPb(externalDataConfiguration.getBigtableOptions())); } + if (externalDataConfiguration.getParquetOptions() != null) { + builder.setFormatOptions( + ParquetOptions.fromPb(externalDataConfiguration.getParquetOptions())); + } builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords()); builder.setAutodetect(externalDataConfiguration.getAutodetect()); if (externalDataConfiguration.getHivePartitioningOptions() != null) { @@ -491,6 +495,10 @@ static ExternalTableDefinition fromExternalDataConfiguration( builder.setFormatOptions( BigtableOptions.fromPb(externalDataConfiguration.getBigtableOptions())); } + if (externalDataConfiguration.getParquetOptions() != null) { + builder.setFormatOptions( + ParquetOptions.fromPb(externalDataConfiguration.getParquetOptions())); + } if (externalDataConfiguration.getMaxBadRecords() != null) { builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords()); } diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java index af1878455..92a57fc8f 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java @@ -126,6 +126,8 @@ public static FormatOptions of(String format) { return googleSheets(); } else if (format.equals(BIGTABLE)) { return bigtable(); + } else if (format.equals(PARQUET)) { + return parquet(); } return new FormatOptions(format); } diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index 0eae67bd6..a317f1285 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -390,6 +390,10 @@ public CsvOptions getCsvOptions() { return formatOptions instanceof CsvOptions ? (CsvOptions) formatOptions : null; } + public ParquetOptions getParquetOptions() { + return formatOptions instanceof ParquetOptions ? (ParquetOptions) formatOptions : null; + } + @Override public DatastoreBackupOptions getDatastoreBackupOptions() { return formatOptions instanceof DatastoreBackupOptions @@ -545,6 +549,10 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() { loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows())); } } + if (getParquetOptions() != null) { + ParquetOptions parquetOptions = getParquetOptions(); + loadConfigurationPb.setParquetOptions(parquetOptions.toPb()); + } if (schema != null) { loadConfigurationPb.setSchema(schema.toPb()); } diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java new file mode 100644 index 000000000..174da41d8 --- /dev/null +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java @@ -0,0 +1,127 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery; + +import com.google.common.base.MoreObjects; +import java.util.Objects; + +public class ParquetOptions extends FormatOptions { + + private static final long serialVersionUID = 1992L; + + private final Boolean enableListInference; + private final Boolean enumAsString; + + public Boolean getEnableListInference() { + return enableListInference; + } + + public Boolean getEnumAsString() { + return enumAsString; + } + + /** A builder for {@code ParquetOptions} objects. */ + public static final class Builder { + private Boolean enableListInference; + private Boolean enumAsString; + + private Builder() {} + + private Builder(ParquetOptions parquetOptions) { + this.enableListInference = parquetOptions.enableListInference; + this.enumAsString = parquetOptions.enumAsString; + } + + public Builder setEnableListInference(Boolean enableListInference) { + this.enableListInference = enableListInference; + return this; + } + + public Builder setEnumAsString(Boolean enumAsString) { + this.enumAsString = enumAsString; + return this; + } + + public ParquetOptions build() { + return new ParquetOptions(this); + } + } + /** Returns a builder for the {@link ParquetOptions} object. */ + public Builder toBuilder() { + return new Builder(this); + } + + ParquetOptions(Builder builder) { + super(FormatOptions.PARQUET); + enableListInference = builder.enableListInference; + enumAsString = builder.enumAsString; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("enableListInference", enableListInference) + .add("enumAsString", enumAsString) + .toString(); + } + + @Override + public final int hashCode() { + return Objects.hash(enableListInference, enumAsString); + } + + @Override + public final boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (obj == null || !obj.getClass().equals(ParquetOptions.class)) { + return false; + } + ParquetOptions other = (ParquetOptions) obj; + return enableListInference == other.enableListInference && enumAsString == other.enumAsString; + } + + /** Returns a builder for a {@link ParquetOptions} object. */ + public static ParquetOptions.Builder newBuilder() { + return new ParquetOptions.Builder(); + } + + static ParquetOptions fromPb( + com.google.api.services.bigquery.model.ParquetOptions parquetOptions) { + Builder builder = newBuilder(); + if (parquetOptions.getEnableListInference() != null) { + builder.setEnableListInference(parquetOptions.getEnableListInference()); + } + if (parquetOptions.getEnumAsString() != null) { + builder.setEnumAsString(parquetOptions.getEnumAsString()); + } + return builder.build(); + } + + com.google.api.services.bigquery.model.ParquetOptions toPb() { + com.google.api.services.bigquery.model.ParquetOptions parquetOptions = + new com.google.api.services.bigquery.model.ParquetOptions(); + if (enableListInference != null) { + parquetOptions.setEnableListInference(enableListInference); + } + if (enumAsString != null) { + parquetOptions.setEnumAsString(enumAsString); + } + return parquetOptions; + } +} diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java new file mode 100644 index 000000000..8812b2e27 --- /dev/null +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java @@ -0,0 +1,67 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class ParquetOptionsTest { + + private static final ParquetOptions OPTIONS = + ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build(); + + @Test + public void testToBuilder() { + compareParquetOptions(OPTIONS, OPTIONS.toBuilder().build()); + ParquetOptions parquetOptions = OPTIONS.toBuilder().setEnableListInference(true).build(); + assertEquals(true, parquetOptions.getEnableListInference()); + parquetOptions = parquetOptions.toBuilder().setEnumAsString(true).build(); + compareParquetOptions(OPTIONS, parquetOptions); + } + + @Test + public void testToBuilderIncomplete() { + ParquetOptions parquetOptions = + ParquetOptions.newBuilder().setEnableListInference(true).build(); + assertEquals(parquetOptions, parquetOptions.toBuilder().build()); + } + + @Test + public void testBuilder() { + assertEquals(FormatOptions.PARQUET, OPTIONS.getType()); + assertEquals(true, OPTIONS.getEnableListInference()); + assertEquals(true, OPTIONS.getEnumAsString()); + } + + @Test + public void testToAndFromPb() { + compareParquetOptions(OPTIONS, ParquetOptions.fromPb(OPTIONS.toPb())); + ParquetOptions parquetOptions = + ParquetOptions.newBuilder().setEnableListInference(true).build(); + compareParquetOptions(parquetOptions, ParquetOptions.fromPb(parquetOptions.toPb())); + } + + private void compareParquetOptions(ParquetOptions expected, ParquetOptions actual) { + assertThat(expected).isEqualTo(actual); + assertThat(expected.getEnableListInference()).isEqualTo(actual.getEnableListInference()); + assertThat(expected.getEnumAsString()).isEqualTo(actual.getEnumAsString()); + assertThat(expected.hashCode()).isEqualTo(actual.hashCode()); + assertThat(expected.toString()).isEqualTo(actual.toString()); + } +} diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java index 9b1374bc1..eae807bf7 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java @@ -77,6 +77,7 @@ import com.google.cloud.bigquery.Model; import com.google.cloud.bigquery.ModelId; import com.google.cloud.bigquery.ModelInfo; +import com.google.cloud.bigquery.ParquetOptions; import com.google.cloud.bigquery.PolicyTags; import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.QueryParameterValue; @@ -1840,10 +1841,13 @@ public void testQueryExternalHivePartitioningOptionAutoLayout() throws Interrupt .setSourceUriPrefix(sourceUriPrefix) .build(); TableId tableId = TableId.of(DATASET, tableName); + ParquetOptions parquetOptions = + ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build(); ExternalTableDefinition externalTable = ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet()) .setAutodetect(true) .setHivePartitioningOptions(hivePartitioningOptions) + .setFormatOptions(parquetOptions) .build(); assertNotNull(bigquery.create(TableInfo.of(tableId, externalTable))); String query = @@ -1866,6 +1870,8 @@ public void testQueryExternalHivePartitioningOptionCustomLayout() throws Interru "gs://" + CLOUD_SAMPLES_DATA + "/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/"; + ParquetOptions parquetOptions = + ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build(); HivePartitioningOptions hivePartitioningOptions = HivePartitioningOptions.newBuilder() .setMode("CUSTOM") @@ -1877,6 +1883,7 @@ public void testQueryExternalHivePartitioningOptionCustomLayout() throws Interru ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet()) .setAutodetect(true) .setHivePartitioningOptions(hivePartitioningOptions) + .setFormatOptions(parquetOptions) .build(); assertNotNull(bigquery.create(TableInfo.of(tableId, externalTable))); String query =