Skip to content

Commit

Permalink
feat: new sample - load: GCS parquet replace existing table (#101)
Browse files Browse the repository at this point in the history
  • Loading branch information
stephaniewang526 committed Jan 14, 2020
1 parent 7fedacd commit c423c91
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 0 deletions.
@@ -0,0 +1,89 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

// [START bigquery_load_table_gcs_parquet_truncate]

import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.FormatOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
import com.google.cloud.bigquery.LoadJobConfiguration;
import com.google.cloud.bigquery.TableId;
import java.math.BigInteger;

public class LoadParquetReplaceTable {

public static void runLoadParquetReplaceTable() {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
loadParquetReplaceTable(datasetName);
}

public static void loadParquetReplaceTable(String datasetName) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

// Imports a GCS file into a table and overwrites table data if table already exists.
// This sample loads CSV file at:
// https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.csv
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet";
TableId tableId = TableId.of(datasetName, "us_states");

// For more information on LoadJobConfiguration see:
// https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/LoadJobConfiguration.Builder.html
LoadJobConfiguration configuration =
LoadJobConfiguration.builder(tableId, sourceUri)
.setFormatOptions(FormatOptions.parquet())
// Set the write disposition to overwrite existing table data.
.setWriteDisposition(WriteDisposition.WRITE_TRUNCATE)
.build();

// For more information on Job see:
// https://googleapis.dev/java/google-cloud-clients/latest/index.html?com/google/cloud/bigquery/package-summary.html
// Load the table
Job job = bigquery.create(JobInfo.of(configuration));

// Load data from a GCS parquet file into the table
// Blocks until this load table job completes its execution, either failing or succeeding.
Job completedJob = job.waitFor();
if (completedJob == null) {
System.out.println("Job not executed since it no longer exists.");
return;
} else if (completedJob.getStatus().getError() != null) {
System.out.println(
"BigQuery was unable to load into the table due to an error: \n"
+ job.getStatus().getError());
return;
}

// Check number of rows loaded into the table
BigInteger numRows = bigquery.getTable(tableId).getNumRows();
System.out.printf("Loaded %d rows. \n", numRows);

System.out.println("GCS parquet overwrote existing table successfully.");
} catch (BigQueryException | InterruptedException e) {
System.out.println("Table extraction job was interrupted. \n" + e.toString());
}
}
}
// [END bigquery_load_table_gcs_parquet_truncate]
@@ -0,0 +1,63 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class LoadParquetReplaceTableIT {
private ByteArrayOutputStream bout;
private PrintStream out;

private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME");

private static void requireEnvVar(String varName) {
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("BIGQUERY_DATASET_NAME");
}

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
System.setOut(out);
}

@After
public void tearDown() {
System.setOut(null);
}

@Test
public void testLoadParquetReplaceTable() {
LoadParquetReplaceTable.loadParquetReplaceTable(BIGQUERY_DATASET_NAME);
assertThat(bout.toString()).contains("GCS parquet overwrote existing table successfully.");
}
}

0 comments on commit c423c91

Please sign in to comment.