Skip to content

Commit

Permalink
refactor(samples): load csv table from gcs (#581)
Browse files Browse the repository at this point in the history
  • Loading branch information
Praful Makani committed Jul 22, 2020
1 parent cf253c5 commit 8d254d8
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 31 deletions.
Expand Up @@ -20,9 +20,14 @@
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.FormatOptions;
import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.Table;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.LoadJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.TableId;

// Sample to load CSV data from Cloud Storage into a new BigQuery table
public class LoadCsvFromGcs {
Expand All @@ -32,34 +37,38 @@ public static void runLoadCsvFromGcs() throws Exception {
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
loadCsvFromGcs(datasetName, tableName, sourceUri);
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
loadCsvFromGcs(datasetName, tableName, sourceUri, schema);
}

public static void loadCsvFromGcs(String datasetName, String tableName, String sourceUri)
throws Exception {
public static void loadCsvFromGcs(
String datasetName, String tableName, String sourceUri, Schema schema) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

Table table = bigquery.getTable(datasetName, tableName);
Job loadJob = table.load(FormatOptions.csv(), sourceUri);
// Skip header row in the file.
CsvOptions csvOptions = CsvOptions.newBuilder().setSkipLeadingRows(1).build();

TableId tableId = TableId.of(datasetName, tableName);
LoadJobConfiguration loadConfig =
LoadJobConfiguration.newBuilder(tableId, sourceUri, csvOptions).setSchema(schema).build();

// Load data from a GCS CSV file into the table
Job job = bigquery.create(JobInfo.of(loadConfig));
// Blocks until this load table job completes its execution, either failing or succeeding.
Job completedJob = loadJob.waitFor();

// Check for errors
if (completedJob == null) {
throw new Exception("Job not executed since it no longer exists.");
} else if (completedJob.getStatus().getError() != null) {
// You can also look at queryJob.getStatus().getExecutionErrors() for all
// errors, not just the latest one.
throw new Exception(
"BigQuery was unable to load into the table due to an error: \n"
+ loadJob.getStatus().getError());
job = job.waitFor();
if (job.isDone()) {
System.out.println("CSV from GCS successfully added during load append job");
} else {
System.out.println(
"BigQuery was unable to load into the table due to an error:"
+ job.getStatus().getError());
}
System.out.println("CSV from GCS successfully added during load append job");
} catch (BigQueryException | InterruptedException e) {
System.out.println("Column not added during load append \n" + e.toString());
}
Expand Down
Expand Up @@ -20,8 +20,8 @@
import static junit.framework.TestCase.assertNotNull;

import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.UUID;
Expand Down Expand Up @@ -58,15 +58,7 @@ public void setUp() {
System.setOut(out);

// Create a test table
tableName = "loadCsvFromGcs_TEST_" + UUID.randomUUID().toString().replace('-', '_');

Schema schema =
Schema.of(
Field.of("name", LegacySQLTypeName.STRING),
Field.of("post_abbr", LegacySQLTypeName.STRING));

CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, schema);

tableName = "LOAD_CSV_TABLE_FROM_GCS_TEST_" + UUID.randomUUID().toString().substring(0, 8);
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
System.setOut(out);
Expand All @@ -80,9 +72,13 @@ public void tearDown() {
}

@Test
public void loadCsvFromGcs() throws Exception {
public void loadCsvFromGcs() {
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
LoadCsvFromGcs.loadCsvFromGcs(BIGQUERY_DATASET_NAME, tableName, sourceUri);
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
LoadCsvFromGcs.loadCsvFromGcs(BIGQUERY_DATASET_NAME, tableName, sourceUri, schema);
assertThat(bout.toString()).contains("CSV from GCS successfully added during load append job");
}
}

0 comments on commit 8d254d8

Please sign in to comment.