From 833b953aad618e88089c7e3487416c5650cc2ccc Mon Sep 17 00:00:00 2001 From: Stephanie Wang Date: Wed, 22 Jan 2020 12:18:40 -0500 Subject: [PATCH] feat: sample - add col load append (#112) * feat: new sample - Add Column Load Append * feat: add column load append work in progress * update comment * updates * updates * fix build issue * update based on comments * update based on comments * update exception handling * update exception handling --- .../example/bigquery/AddColumnLoadAppend.java | 104 ++++++++++++++++++ .../bigquery/AddColumnLoadAppendIT.java | 78 +++++++++++++ .../com/example/bigquery/CreateDatasetIT.java | 18 +++ .../com/example/bigquery/CreateTableIT.java | 3 + 4 files changed, 203 insertions(+) create mode 100644 samples/src/main/java/com/example/bigquery/AddColumnLoadAppend.java create mode 100644 samples/src/test/java/com/example/bigquery/AddColumnLoadAppendIT.java diff --git a/samples/src/main/java/com/example/bigquery/AddColumnLoadAppend.java b/samples/src/main/java/com/example/bigquery/AddColumnLoadAppend.java new file mode 100644 index 000000000..9587ac945 --- /dev/null +++ b/samples/src/main/java/com/example/bigquery/AddColumnLoadAppend.java @@ -0,0 +1,104 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquery; + +// [START bigquery_relax_column_load_append] +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQueryException; +import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.FormatOptions; +import com.google.cloud.bigquery.Job; +import com.google.cloud.bigquery.JobId; +import com.google.cloud.bigquery.JobInfo; +import com.google.cloud.bigquery.JobInfo.SchemaUpdateOption; +import com.google.cloud.bigquery.JobInfo.WriteDisposition; +import com.google.cloud.bigquery.LegacySQLTypeName; +import com.google.cloud.bigquery.LoadJobConfiguration; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.Table; +import com.google.cloud.bigquery.TableId; +import com.google.common.collect.ImmutableList; +import java.util.UUID; + +public class AddColumnLoadAppend { + + public static void runAddColumnLoadAppend() throws Exception { + // TODO(developer): Replace these variables before running the sample. + String datasetName = "MY_DATASET_NAME"; + String tableName = "MY_TABLE_NAME"; + String sourceUri = "/path/to/file.csv"; + addColumnLoadAppend(datasetName, tableName, sourceUri); + } + + public static void addColumnLoadAppend(String datasetName, String tableName, String sourceUri) + throws Exception { + try { + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. + BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); + + TableId tableId = TableId.of(datasetName, tableName); + Table table = bigquery.getTable(tableId); + + // Add a new column to a BigQuery table while appending rows via a load job. + // 'REQUIRED' fields cannot be added to an existing schema, so the additional column must be + // 'NULLABLE'. + Schema newSchema = + Schema.of( + Field.newBuilder("name", LegacySQLTypeName.STRING) + .setMode(Field.Mode.REQUIRED) + .build(), + // Adding below additional column during the load job + Field.newBuilder("post_abbr", LegacySQLTypeName.STRING) + .setMode(Field.Mode.NULLABLE) + .build()); + + LoadJobConfiguration loadJobConfig = + LoadJobConfiguration.builder(tableId, sourceUri) + .setFormatOptions(FormatOptions.csv()) + .setWriteDisposition(WriteDisposition.WRITE_APPEND) + .setSchema(newSchema) + .setSchemaUpdateOptions(ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION)) + .build(); + + // Create a job ID so that we can safely retry. + JobId jobId = JobId.of(UUID.randomUUID().toString()); + Job loadJob = bigquery.create(JobInfo.newBuilder(loadJobConfig).setJobId(jobId).build()); + System.out.println(loadJob.getJobId()); + + // Load data from a GCS parquet file into the table + // Blocks until this load table job completes its execution, either failing or succeeding. + Job completedJob = loadJob.waitFor(); + + // Check for errors + if (completedJob == null) { + throw new Exception("Job not executed since it no longer exists."); + } else if (completedJob.getStatus().getError() != null) { + // You can also look at queryJob.getStatus().getExecutionErrors() for all + // errors, not just the latest one. + throw new Exception( + "BigQuery was unable to load into the table due to an error: \n" + + loadJob.getStatus().getError()); + } + System.out.println("Column successfully added during load append job"); + } catch (BigQueryException | InterruptedException e) { + System.out.println("Column not added during load append \n" + e.toString()); + } + } +} +// [END bigquery_relax_column_load_append] diff --git a/samples/src/test/java/com/example/bigquery/AddColumnLoadAppendIT.java b/samples/src/test/java/com/example/bigquery/AddColumnLoadAppendIT.java new file mode 100644 index 000000000..34b677888 --- /dev/null +++ b/samples/src/test/java/com/example/bigquery/AddColumnLoadAppendIT.java @@ -0,0 +1,78 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquery; + +import static junit.framework.TestCase.assertNotNull; + +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.LegacySQLTypeName; +import com.google.cloud.bigquery.Schema; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class AddColumnLoadAppendIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME"); + + private static void requireEnvVar(String varName) { + assertNotNull( + "Environment variable " + varName + " is required to perform these tests.", + System.getenv(varName)); + } + + @BeforeClass + public static void checkRequirements() { + requireEnvVar("BIGQUERY_DATASET_NAME"); + } + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testAddColumnLoadAppend() throws Exception { + String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"; + + String tableName = "ADD_COLUMN_LOAD_APPEND_TEST"; + Schema originalSchema = + Schema.of( + Field.newBuilder("name", LegacySQLTypeName.STRING) + .setMode(Field.Mode.REQUIRED) + .build()); + + CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, originalSchema); + + AddColumnLoadAppend.addColumnLoadAppend(BIGQUERY_DATASET_NAME, tableName, sourceUri); + + // Clean up + DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName); + } +} diff --git a/samples/src/test/java/com/example/bigquery/CreateDatasetIT.java b/samples/src/test/java/com/example/bigquery/CreateDatasetIT.java index 32222c603..276eae046 100644 --- a/samples/src/test/java/com/example/bigquery/CreateDatasetIT.java +++ b/samples/src/test/java/com/example/bigquery/CreateDatasetIT.java @@ -17,18 +17,33 @@ package com.example.bigquery; import static com.google.common.truth.Truth.assertThat; +import static junit.framework.TestCase.assertNotNull; import com.google.cloud.bigquery.testing.RemoteBigQueryHelper; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import org.junit.After; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; public class CreateDatasetIT { private ByteArrayOutputStream bout; private PrintStream out; + private static final String GOOGLE_CLOUD_PROJECT = System.getenv("GOOGLE_CLOUD_PROJECT"); + + private static void requireEnvVar(String varName) { + assertNotNull( + "Environment variable " + varName + " is required to perform these tests.", + System.getenv(varName)); + } + + @BeforeClass + public static void checkRequirements() { + requireEnvVar("GOOGLE_CLOUD_PROJECT"); + } + @Before public void setUp() { bout = new ByteArrayOutputStream(); @@ -46,5 +61,8 @@ public void testCreateDataset() { String generatedDatasetName = RemoteBigQueryHelper.generateDatasetName(); CreateDataset.createDataset(generatedDatasetName); assertThat(bout.toString()).contains(generatedDatasetName + " created successfully"); + + // Clean up + DeleteDataset.deleteDataset(GOOGLE_CLOUD_PROJECT, generatedDatasetName); } } diff --git a/samples/src/test/java/com/example/bigquery/CreateTableIT.java b/samples/src/test/java/com/example/bigquery/CreateTableIT.java index 2572f6069..497ae4bab 100644 --- a/samples/src/test/java/com/example/bigquery/CreateTableIT.java +++ b/samples/src/test/java/com/example/bigquery/CreateTableIT.java @@ -60,5 +60,8 @@ public void testCreateTable() { CreateTable.createTable(generatedDatasetName, tableName, schema); assertThat(bout.toString()).contains("Table created successfully"); + + // Clean up + DeleteTable.deleteTable(generatedDatasetName, tableName); } }