From c423c91f7689bfc29a94b452842077e2622c621a Mon Sep 17 00:00:00 2001 From: Stephanie Wang Date: Tue, 14 Jan 2020 13:08:14 -0500 Subject: [PATCH] feat: new sample - load: GCS parquet replace existing table (#101) --- .../bigquery/LoadParquetReplaceTable.java | 89 +++++++++++++++++++ .../bigquery/LoadParquetReplaceTableIT.java | 63 +++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 samples/src/main/java/com/example/bigquery/LoadParquetReplaceTable.java create mode 100644 samples/src/test/java/com/example/bigquery/LoadParquetReplaceTableIT.java diff --git a/samples/src/main/java/com/example/bigquery/LoadParquetReplaceTable.java b/samples/src/main/java/com/example/bigquery/LoadParquetReplaceTable.java new file mode 100644 index 000000000..80f815046 --- /dev/null +++ b/samples/src/main/java/com/example/bigquery/LoadParquetReplaceTable.java @@ -0,0 +1,89 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquery; + +// [START bigquery_load_table_gcs_parquet_truncate] + +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQueryException; +import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.FormatOptions; +import com.google.cloud.bigquery.Job; +import com.google.cloud.bigquery.JobInfo; +import com.google.cloud.bigquery.JobInfo.WriteDisposition; +import com.google.cloud.bigquery.LoadJobConfiguration; +import com.google.cloud.bigquery.TableId; +import java.math.BigInteger; + +public class LoadParquetReplaceTable { + + public static void runLoadParquetReplaceTable() { + // TODO(developer): Replace these variables before running the sample. + String datasetName = "MY_DATASET_NAME"; + loadParquetReplaceTable(datasetName); + } + + public static void loadParquetReplaceTable(String datasetName) { + try { + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. + BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); + + // Imports a GCS file into a table and overwrites table data if table already exists. + // This sample loads CSV file at: + // https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.csv + String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"; + TableId tableId = TableId.of(datasetName, "us_states"); + + // For more information on LoadJobConfiguration see: + // https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/LoadJobConfiguration.Builder.html + LoadJobConfiguration configuration = + LoadJobConfiguration.builder(tableId, sourceUri) + .setFormatOptions(FormatOptions.parquet()) + // Set the write disposition to overwrite existing table data. + .setWriteDisposition(WriteDisposition.WRITE_TRUNCATE) + .build(); + + // For more information on Job see: + // https://googleapis.dev/java/google-cloud-clients/latest/index.html?com/google/cloud/bigquery/package-summary.html + // Load the table + Job job = bigquery.create(JobInfo.of(configuration)); + + // Load data from a GCS parquet file into the table + // Blocks until this load table job completes its execution, either failing or succeeding. + Job completedJob = job.waitFor(); + if (completedJob == null) { + System.out.println("Job not executed since it no longer exists."); + return; + } else if (completedJob.getStatus().getError() != null) { + System.out.println( + "BigQuery was unable to load into the table due to an error: \n" + + job.getStatus().getError()); + return; + } + + // Check number of rows loaded into the table + BigInteger numRows = bigquery.getTable(tableId).getNumRows(); + System.out.printf("Loaded %d rows. \n", numRows); + + System.out.println("GCS parquet overwrote existing table successfully."); + } catch (BigQueryException | InterruptedException e) { + System.out.println("Table extraction job was interrupted. \n" + e.toString()); + } + } +} +// [END bigquery_load_table_gcs_parquet_truncate] diff --git a/samples/src/test/java/com/example/bigquery/LoadParquetReplaceTableIT.java b/samples/src/test/java/com/example/bigquery/LoadParquetReplaceTableIT.java new file mode 100644 index 000000000..1d2bd0550 --- /dev/null +++ b/samples/src/test/java/com/example/bigquery/LoadParquetReplaceTableIT.java @@ -0,0 +1,63 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquery; + +import static com.google.common.truth.Truth.assertThat; +import static junit.framework.TestCase.assertNotNull; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class LoadParquetReplaceTableIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME"); + + private static void requireEnvVar(String varName) { + assertNotNull( + "Environment variable " + varName + " is required to perform these tests.", + System.getenv(varName)); + } + + @BeforeClass + public static void checkRequirements() { + requireEnvVar("BIGQUERY_DATASET_NAME"); + } + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testLoadParquetReplaceTable() { + LoadParquetReplaceTable.loadParquetReplaceTable(BIGQUERY_DATASET_NAME); + assertThat(bout.toString()).contains("GCS parquet overwrote existing table successfully."); + } +}