Skip to content

Commit

Permalink
Addressed PR review feedback.
Browse files Browse the repository at this point in the history
- Simplify code, remove duplicate-record example.
- Split all snippets and tests into separate classes.
- Add comments and javadocs links.
- Clean up imports.
- Add region tags.
- Catch only specific exceptions.
- Run linter and fmt-maven-plugin.
  • Loading branch information
VeronicaWasson committed Jan 8, 2021
1 parent 668e1d1 commit c9993be
Show file tree
Hide file tree
Showing 6 changed files with 307 additions and 76 deletions.
Expand Up @@ -16,101 +16,77 @@

package com.example.bigquerystorage;

// [START bigquerystorage_jsonstreamwriter_committed]
import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.*;
import com.google.cloud.bigquery.storage.v1beta2.*;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse;
import com.google.cloud.bigquery.storage.v1beta2.BigQueryWriteClient;
import com.google.cloud.bigquery.storage.v1beta2.CreateWriteStreamRequest;
import com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter;
import com.google.cloud.bigquery.storage.v1beta2.TableName;
import com.google.cloud.bigquery.storage.v1beta2.WriteStream;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import org.json.JSONArray;
import org.json.JSONObject;

public class WriteCommittedStream {

public static Status.Code getStatusCode(StatusRuntimeException e) {
return e.getStatus().getCode();
}

public static void runWriteCommittedStream() {
public static void runWriteCommittedStream()
throws DescriptorValidationException, InterruptedException, IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
writeCommittedStream(projectId, datasetName, tableName);
}

public static void writeCommittedStream(String projectId, String datasetName, String tableName) {
public static void writeCommittedStream(String projectId, String datasetName, String tableName)
throws DescriptorValidationException, InterruptedException, IOException {

try (BigQueryWriteClient client = BigQueryWriteClient.create()) {

// Initialize a write stream for the specified table.
// For more information on WriteStream.Type, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/WriteStream.Type.html
WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.COMMITTED).build();

TableName parent = TableName.of(projectId, datasetName, tableName);

TableName parentTable = TableName.of(projectId, datasetName, tableName);
CreateWriteStreamRequest createWriteStreamRequest =
CreateWriteStreamRequest.newBuilder()
.setParent(parent.toString())
.setParent(parentTable.toString())
.setWriteStream(stream)
.build();
WriteStream writeStream = client.createWriteStream(createWriteStreamRequest);

// Use the JSON stream writer to send records in JSON format.
// For more information about JsonStreamWriter, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JsonStreamWriter.html
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema(), client)
.build()) {

int offsets[] = {0, 1, 2, 3, 4, 5, 5};
// The last offset is repeated. This will cause an ALREADY_EXISTS error.
// Append 10 JSON objects to the stream.
for (int i = 0; i < 10; i++) {

for (int i : offsets) {
// Create a JSON object that is compatible with the table schema.
JSONObject record = new JSONObject();
record.put("col1", String.format("record %03d", i));
JSONArray jsonArr = new JSONArray();
jsonArr.put(record);

// To detect duplicate records, pass the index as the record offset.
// To disable deduplication, omit the offset or use WriteStream.Type.DEFAULT.
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr, i);
AppendRowsResponse response = future.get();
}

} catch (ExecutionException ex) {
Throwable cause = ex.getCause();
if (cause instanceof StatusRuntimeException) {
System.out.println(
"Failed with status = " + getStatusCode((StatusRuntimeException) cause));
}
throw ex;
}

System.out.println("Appended records successfully.");

} catch (Exception e) {
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println("Failed to append records. \n" + e.toString());
}
}

public static void writeToDefaultStream(String projectId, String datasetName, String tableName) {

TableName parent = TableName.of(projectId, datasetName, tableName);

BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
Table table = bigquery.getTable(datasetName, tableName);
Schema schema = table.getDefinition().getSchema();

try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(parent.toString(), schema).createDefaultStream().build()) {

for (int i = 0; i < 10; i++) {
JSONObject record = new JSONObject();
record.put("col1", String.format("record %03d", i));
JSONArray jsonArr = new JSONArray();
jsonArr.put(record);

ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
} catch (Exception e) {
System.out.println(e);
}
}
}
// [END bigquerystorage_jsonstreamwriter_committed]
Expand Up @@ -16,14 +16,28 @@

package com.example.bigquerystorage;

// [START bigquerystorage_jsonstreamwriter_pending]
import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.storage.v1beta2.*;
import com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse;
import com.google.cloud.bigquery.storage.v1beta2.BatchCommitWriteStreamsRequest;
import com.google.cloud.bigquery.storage.v1beta2.BatchCommitWriteStreamsResponse;
import com.google.cloud.bigquery.storage.v1beta2.BigQueryWriteClient;
import com.google.cloud.bigquery.storage.v1beta2.CreateWriteStreamRequest;
import com.google.cloud.bigquery.storage.v1beta2.FinalizeWriteStreamResponse;
import com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter;
import com.google.cloud.bigquery.storage.v1beta2.TableName;
import com.google.cloud.bigquery.storage.v1beta2.WriteStream;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import org.json.JSONArray;
import org.json.JSONObject;

public class WritePendingStream {

public static void runWritePendingStream() {
public static void runWritePendingStream()
throws DescriptorValidationException, InterruptedException, IOException {

// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
Expand All @@ -32,26 +46,34 @@ public static void runWritePendingStream() {
writePendingStream(projectId, datasetName, tableName);
}

public static void writePendingStream(String projectId, String datasetName, String tableName) {
public static void writePendingStream(String projectId, String datasetName, String tableName)
throws DescriptorValidationException, InterruptedException, IOException {

try (BigQueryWriteClient client = BigQueryWriteClient.create()) {

// Initialize a write stream for the specified table.
// For more information on WriteStream.Type, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/WriteStream.Type.html
WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.PENDING).build();

TableName parent = TableName.of(projectId, datasetName, tableName);

TableName parentTable = TableName.of(projectId, datasetName, tableName);
CreateWriteStreamRequest createWriteStreamRequest =
CreateWriteStreamRequest.newBuilder()
.setParent(parent.toString())
.setParent(parentTable.toString())
.setWriteStream(stream)
.build();
WriteStream writeStream = client.createWriteStream(createWriteStreamRequest);

// Use the JSON stream writer to send records in JSON format.
// For more information about JsonStreamWriter, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JsonStreamWriter.html
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema(), client)
.build()) {

// Append 10 JSON objects to the stream.
for (int i = 0; i < 10; i++) {

// Create a JSON object that is compatible with the table schema.
JSONObject record = new JSONObject();
record.put("col1", String.format("batch-record %03d", i));
JSONArray jsonArr = new JSONArray();
Expand All @@ -65,16 +87,21 @@ public static void writePendingStream(String projectId, String datasetName, Stri
System.out.println("Rows written: " + finalizeResponse.getRowCount());
}

// Commit the streams
// Commit the streams.
BatchCommitWriteStreamsRequest commitRequest =
BatchCommitWriteStreamsRequest.newBuilder()
.setParent(parent.toString())
.setParent(parentTable.toString())
.addWriteStreams(writeStream.getName())
.build();
BatchCommitWriteStreamsResponse commitResponse =
client.batchCommitWriteStreams(commitRequest);
} catch (Exception e) {
System.out.println("Appended and committed records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println(e);
}
}
}
// [START bigquerystorage_jsonstreamwriter_pending]
@@ -0,0 +1,80 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerystorage;

// [START bigquerystorage_jsonstreamwriter_default]
import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.Table;
import com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse;
import com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter;
import com.google.cloud.bigquery.storage.v1beta2.TableName;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import org.json.JSONArray;
import org.json.JSONObject;

public class WriteToDefaultStream {

public static void runWriteToDefaultStream()
throws DescriptorValidationException, InterruptedException, IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
writeToDefaultStream(projectId, datasetName, tableName);
}

public static void writeToDefaultStream(String projectId, String datasetName, String tableName)
throws DescriptorValidationException, InterruptedException, IOException {

BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
Table table = bigquery.getTable(datasetName, tableName);
TableName parentTable = TableName.of(projectId, datasetName, tableName);
Schema schema = table.getDefinition().getSchema();

// Use the JSON stream writer to send records in JSON format.
// For more information about JsonStreamWriter, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JstreamWriter.html
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(parentTable.toString(), schema).createDefaultStream().build()) {

// Append 10 JSON objects to the stream.
for (int i = 0; i < 10; i++) {

// Create a JSON object that is compatible with the table schema.
JSONObject record = new JSONObject();
record.put("col1", String.format("record %03d", i));
JSONArray jsonArr = new JSONArray();
jsonArr.put(record);

ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
System.out.println("Appended records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println("Failed to append records. \n" + e.toString());
}
}
}
// [END bigquerystorage_jsonstreamwriter_default]
Expand Up @@ -16,6 +16,7 @@

package com.example.bigquerystorage;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import java.io.ByteArrayOutputStream;
Expand All @@ -30,7 +31,7 @@
import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
public class WriteAPISampleIT {
public class WriteCommittedStreamIT {

private static final String GOOGLE_CLOUD_PROJECT = System.getenv("GOOGLE_CLOUD_PROJECT");
private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME");
Expand Down Expand Up @@ -73,17 +74,6 @@ public void tearDown() {
public void testWriteCommittedStream() throws Exception {
WriteCommittedStream.writeCommittedStream(
GOOGLE_CLOUD_PROJECT, BIGQUERY_DATASET_NAME, BIGQUERY_TABLE_NAME);
}

@Test
public void testWriteToDefaultStream() throws Exception {
WriteCommittedStream.writeToDefaultStream(
GOOGLE_CLOUD_PROJECT, BIGQUERY_DATASET_NAME, BIGQUERY_TABLE_NAME);
}

@Test
public void testWritePendingStream() throws Exception {
WritePendingStream.writePendingStream(
GOOGLE_CLOUD_PROJECT, BIGQUERY_DATASET_NAME, BIGQUERY_TABLE_NAME);
assertThat(bout.toString()).contains("Appended records successfully.");
}
}

0 comments on commit c9993be

Please sign in to comment.