Skip to content

Commit

Permalink
feat: add ParquetOptions support and expose it in LoadJobConfiguratio…
Browse files Browse the repository at this point in the history
…n and ExternalTableDefinition classes (#1318)

* feat: add ParquetOptions support and expose it in LoadJobConfiguration and ExternalTableDefinition classes

Fixes #1302

* update IT
  • Loading branch information
stephaniewang526 committed May 18, 2021
1 parent 1586b02 commit 72b1715
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 0 deletions.
Expand Up @@ -449,6 +449,10 @@ static ExternalTableDefinition fromPb(Table tablePb) {
builder.setFormatOptions(
BigtableOptions.fromPb(externalDataConfiguration.getBigtableOptions()));
}
if (externalDataConfiguration.getParquetOptions() != null) {
builder.setFormatOptions(
ParquetOptions.fromPb(externalDataConfiguration.getParquetOptions()));
}
builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords());
builder.setAutodetect(externalDataConfiguration.getAutodetect());
if (externalDataConfiguration.getHivePartitioningOptions() != null) {
Expand Down Expand Up @@ -491,6 +495,10 @@ static ExternalTableDefinition fromExternalDataConfiguration(
builder.setFormatOptions(
BigtableOptions.fromPb(externalDataConfiguration.getBigtableOptions()));
}
if (externalDataConfiguration.getParquetOptions() != null) {
builder.setFormatOptions(
ParquetOptions.fromPb(externalDataConfiguration.getParquetOptions()));
}
if (externalDataConfiguration.getMaxBadRecords() != null) {
builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords());
}
Expand Down
Expand Up @@ -126,6 +126,8 @@ public static FormatOptions of(String format) {
return googleSheets();
} else if (format.equals(BIGTABLE)) {
return bigtable();
} else if (format.equals(PARQUET)) {
return parquet();
}
return new FormatOptions(format);
}
Expand Down
Expand Up @@ -390,6 +390,10 @@ public CsvOptions getCsvOptions() {
return formatOptions instanceof CsvOptions ? (CsvOptions) formatOptions : null;
}

public ParquetOptions getParquetOptions() {
return formatOptions instanceof ParquetOptions ? (ParquetOptions) formatOptions : null;
}

@Override
public DatastoreBackupOptions getDatastoreBackupOptions() {
return formatOptions instanceof DatastoreBackupOptions
Expand Down Expand Up @@ -545,6 +549,10 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows()));
}
}
if (getParquetOptions() != null) {
ParquetOptions parquetOptions = getParquetOptions();
loadConfigurationPb.setParquetOptions(parquetOptions.toPb());
}
if (schema != null) {
loadConfigurationPb.setSchema(schema.toPb());
}
Expand Down
@@ -0,0 +1,127 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import com.google.common.base.MoreObjects;
import java.util.Objects;

public class ParquetOptions extends FormatOptions {

private static final long serialVersionUID = 1992L;

private final Boolean enableListInference;
private final Boolean enumAsString;

public Boolean getEnableListInference() {
return enableListInference;
}

public Boolean getEnumAsString() {
return enumAsString;
}

/** A builder for {@code ParquetOptions} objects. */
public static final class Builder {
private Boolean enableListInference;
private Boolean enumAsString;

private Builder() {}

private Builder(ParquetOptions parquetOptions) {
this.enableListInference = parquetOptions.enableListInference;
this.enumAsString = parquetOptions.enumAsString;
}

public Builder setEnableListInference(Boolean enableListInference) {
this.enableListInference = enableListInference;
return this;
}

public Builder setEnumAsString(Boolean enumAsString) {
this.enumAsString = enumAsString;
return this;
}

public ParquetOptions build() {
return new ParquetOptions(this);
}
}
/** Returns a builder for the {@link ParquetOptions} object. */
public Builder toBuilder() {
return new Builder(this);
}

ParquetOptions(Builder builder) {
super(FormatOptions.PARQUET);
enableListInference = builder.enableListInference;
enumAsString = builder.enumAsString;
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("enableListInference", enableListInference)
.add("enumAsString", enumAsString)
.toString();
}

@Override
public final int hashCode() {
return Objects.hash(enableListInference, enumAsString);
}

@Override
public final boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (obj == null || !obj.getClass().equals(ParquetOptions.class)) {
return false;
}
ParquetOptions other = (ParquetOptions) obj;
return enableListInference == other.enableListInference && enumAsString == other.enumAsString;
}

/** Returns a builder for a {@link ParquetOptions} object. */
public static ParquetOptions.Builder newBuilder() {
return new ParquetOptions.Builder();
}

static ParquetOptions fromPb(
com.google.api.services.bigquery.model.ParquetOptions parquetOptions) {
Builder builder = newBuilder();
if (parquetOptions.getEnableListInference() != null) {
builder.setEnableListInference(parquetOptions.getEnableListInference());
}
if (parquetOptions.getEnumAsString() != null) {
builder.setEnumAsString(parquetOptions.getEnumAsString());
}
return builder.build();
}

com.google.api.services.bigquery.model.ParquetOptions toPb() {
com.google.api.services.bigquery.model.ParquetOptions parquetOptions =
new com.google.api.services.bigquery.model.ParquetOptions();
if (enableListInference != null) {
parquetOptions.setEnableListInference(enableListInference);
}
if (enumAsString != null) {
parquetOptions.setEnumAsString(enumAsString);
}
return parquetOptions;
}
}
@@ -0,0 +1,67 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertEquals;

import org.junit.Test;

public class ParquetOptionsTest {

private static final ParquetOptions OPTIONS =
ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build();

@Test
public void testToBuilder() {
compareParquetOptions(OPTIONS, OPTIONS.toBuilder().build());
ParquetOptions parquetOptions = OPTIONS.toBuilder().setEnableListInference(true).build();
assertEquals(true, parquetOptions.getEnableListInference());
parquetOptions = parquetOptions.toBuilder().setEnumAsString(true).build();
compareParquetOptions(OPTIONS, parquetOptions);
}

@Test
public void testToBuilderIncomplete() {
ParquetOptions parquetOptions =
ParquetOptions.newBuilder().setEnableListInference(true).build();
assertEquals(parquetOptions, parquetOptions.toBuilder().build());
}

@Test
public void testBuilder() {
assertEquals(FormatOptions.PARQUET, OPTIONS.getType());
assertEquals(true, OPTIONS.getEnableListInference());
assertEquals(true, OPTIONS.getEnumAsString());
}

@Test
public void testToAndFromPb() {
compareParquetOptions(OPTIONS, ParquetOptions.fromPb(OPTIONS.toPb()));
ParquetOptions parquetOptions =
ParquetOptions.newBuilder().setEnableListInference(true).build();
compareParquetOptions(parquetOptions, ParquetOptions.fromPb(parquetOptions.toPb()));
}

private void compareParquetOptions(ParquetOptions expected, ParquetOptions actual) {
assertThat(expected).isEqualTo(actual);
assertThat(expected.getEnableListInference()).isEqualTo(actual.getEnableListInference());
assertThat(expected.getEnumAsString()).isEqualTo(actual.getEnumAsString());
assertThat(expected.hashCode()).isEqualTo(actual.hashCode());
assertThat(expected.toString()).isEqualTo(actual.toString());
}
}
Expand Up @@ -77,6 +77,7 @@
import com.google.cloud.bigquery.Model;
import com.google.cloud.bigquery.ModelId;
import com.google.cloud.bigquery.ModelInfo;
import com.google.cloud.bigquery.ParquetOptions;
import com.google.cloud.bigquery.PolicyTags;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.QueryParameterValue;
Expand Down Expand Up @@ -1840,10 +1841,13 @@ public void testQueryExternalHivePartitioningOptionAutoLayout() throws Interrupt
.setSourceUriPrefix(sourceUriPrefix)
.build();
TableId tableId = TableId.of(DATASET, tableName);
ParquetOptions parquetOptions =
ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build();
ExternalTableDefinition externalTable =
ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet())
.setAutodetect(true)
.setHivePartitioningOptions(hivePartitioningOptions)
.setFormatOptions(parquetOptions)
.build();
assertNotNull(bigquery.create(TableInfo.of(tableId, externalTable)));
String query =
Expand All @@ -1866,6 +1870,8 @@ public void testQueryExternalHivePartitioningOptionCustomLayout() throws Interru
"gs://"
+ CLOUD_SAMPLES_DATA
+ "/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/";
ParquetOptions parquetOptions =
ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build();
HivePartitioningOptions hivePartitioningOptions =
HivePartitioningOptions.newBuilder()
.setMode("CUSTOM")
Expand All @@ -1877,6 +1883,7 @@ public void testQueryExternalHivePartitioningOptionCustomLayout() throws Interru
ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet())
.setAutodetect(true)
.setHivePartitioningOptions(hivePartitioningOptions)
.setFormatOptions(parquetOptions)
.build();
assertNotNull(bigquery.create(TableInfo.of(tableId, externalTable)));
String query =
Expand Down

0 comments on commit 72b1715

Please sign in to comment.