Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
feat: sample - add col load append (#112)
* feat: new sample - Add Column Load Append

* feat: add column load append work in progress

* update comment

* updates

* updates

* fix build issue

* update based on comments

* update based on comments

* update exception handling

* update exception handling
  • Loading branch information
stephaniewang526 committed Jan 22, 2020
1 parent eafa6ba commit 833b953
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 0 deletions.
104 changes: 104 additions & 0 deletions samples/src/main/java/com/example/bigquery/AddColumnLoadAppend.java
@@ -0,0 +1,104 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

// [START bigquery_relax_column_load_append]
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.FormatOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.JobInfo.SchemaUpdateOption;
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.LoadJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.Table;
import com.google.cloud.bigquery.TableId;
import com.google.common.collect.ImmutableList;
import java.util.UUID;

public class AddColumnLoadAppend {

public static void runAddColumnLoadAppend() throws Exception {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
String sourceUri = "/path/to/file.csv";
addColumnLoadAppend(datasetName, tableName, sourceUri);
}

public static void addColumnLoadAppend(String datasetName, String tableName, String sourceUri)
throws Exception {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

TableId tableId = TableId.of(datasetName, tableName);
Table table = bigquery.getTable(tableId);

// Add a new column to a BigQuery table while appending rows via a load job.
// 'REQUIRED' fields cannot be added to an existing schema, so the additional column must be
// 'NULLABLE'.
Schema newSchema =
Schema.of(
Field.newBuilder("name", LegacySQLTypeName.STRING)
.setMode(Field.Mode.REQUIRED)
.build(),
// Adding below additional column during the load job
Field.newBuilder("post_abbr", LegacySQLTypeName.STRING)
.setMode(Field.Mode.NULLABLE)
.build());

LoadJobConfiguration loadJobConfig =
LoadJobConfiguration.builder(tableId, sourceUri)
.setFormatOptions(FormatOptions.csv())
.setWriteDisposition(WriteDisposition.WRITE_APPEND)
.setSchema(newSchema)
.setSchemaUpdateOptions(ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION))
.build();

// Create a job ID so that we can safely retry.
JobId jobId = JobId.of(UUID.randomUUID().toString());
Job loadJob = bigquery.create(JobInfo.newBuilder(loadJobConfig).setJobId(jobId).build());
System.out.println(loadJob.getJobId());

// Load data from a GCS parquet file into the table
// Blocks until this load table job completes its execution, either failing or succeeding.
Job completedJob = loadJob.waitFor();

// Check for errors
if (completedJob == null) {
throw new Exception("Job not executed since it no longer exists.");
} else if (completedJob.getStatus().getError() != null) {
// You can also look at queryJob.getStatus().getExecutionErrors() for all
// errors, not just the latest one.
throw new Exception(
"BigQuery was unable to load into the table due to an error: \n"
+ loadJob.getStatus().getError());
}
System.out.println("Column successfully added during load append job");
} catch (BigQueryException | InterruptedException e) {
System.out.println("Column not added during load append \n" + e.toString());
}
}
}
// [END bigquery_relax_column_load_append]
@@ -0,0 +1,78 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

import static junit.framework.TestCase.assertNotNull;

import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.Schema;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class AddColumnLoadAppendIT {
private ByteArrayOutputStream bout;
private PrintStream out;

private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME");

private static void requireEnvVar(String varName) {
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("BIGQUERY_DATASET_NAME");
}

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
System.setOut(out);
}

@After
public void tearDown() {
System.setOut(null);
}

@Test
public void testAddColumnLoadAppend() throws Exception {
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";

String tableName = "ADD_COLUMN_LOAD_APPEND_TEST";
Schema originalSchema =
Schema.of(
Field.newBuilder("name", LegacySQLTypeName.STRING)
.setMode(Field.Mode.REQUIRED)
.build());

CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, originalSchema);

AddColumnLoadAppend.addColumnLoadAppend(BIGQUERY_DATASET_NAME, tableName, sourceUri);

// Clean up
DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName);
}
}
18 changes: 18 additions & 0 deletions samples/src/test/java/com/example/bigquery/CreateDatasetIT.java
Expand Up @@ -17,18 +17,33 @@
package com.example.bigquery;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import com.google.cloud.bigquery.testing.RemoteBigQueryHelper;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class CreateDatasetIT {
private ByteArrayOutputStream bout;
private PrintStream out;

private static final String GOOGLE_CLOUD_PROJECT = System.getenv("GOOGLE_CLOUD_PROJECT");

private static void requireEnvVar(String varName) {
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("GOOGLE_CLOUD_PROJECT");
}

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
Expand All @@ -46,5 +61,8 @@ public void testCreateDataset() {
String generatedDatasetName = RemoteBigQueryHelper.generateDatasetName();
CreateDataset.createDataset(generatedDatasetName);
assertThat(bout.toString()).contains(generatedDatasetName + " created successfully");

// Clean up
DeleteDataset.deleteDataset(GOOGLE_CLOUD_PROJECT, generatedDatasetName);
}
}
3 changes: 3 additions & 0 deletions samples/src/test/java/com/example/bigquery/CreateTableIT.java
Expand Up @@ -60,5 +60,8 @@ public void testCreateTable() {
CreateTable.createTable(generatedDatasetName, tableName, schema);

assertThat(bout.toString()).contains("Table created successfully");

// Clean up
DeleteTable.deleteTable(generatedDatasetName, tableName);
}
}

0 comments on commit 833b953

Please sign in to comment.