From 53a56be8b0878edbc4bc5dd67ed63b48c96d3899 Mon Sep 17 00:00:00 2001 From: Stephanie Wang Date: Tue, 20 Oct 2020 20:15:23 -0400 Subject: [PATCH] docs(samples): create and query Amazon s3 data using external table (#835) * docs(samples): add omni query on external table aws * docs(samples): update code * docs(samples): add user id and external id * docs(samples): fix connection name * docs(samples): update create external table AWS sample * update create dataset AWS sample * nit clean up * update query external table sample * fix checkstyle errors * update based on comments Co-authored-by: Praful Makani --- .../bigquery/CreateExternalTableAws.java | 28 ++++-- .../bigquery/QueryExternalTableAws.java | 68 +++++++++++++ .../example/bigquery/CreateDatasetAwsIT.java | 8 +- .../bigquery/CreateExternalTableAwsIT.java | 53 +++------- .../bigquery/QueryExternalTableAwsIT.java | 97 +++++++++++++++++++ 5 files changed, 203 insertions(+), 51 deletions(-) create mode 100644 samples/snippets/src/main/java/com/example/bigquery/QueryExternalTableAws.java create mode 100644 samples/snippets/src/test/java/com/example/bigquery/QueryExternalTableAwsIT.java diff --git a/samples/snippets/src/main/java/com/example/bigquery/CreateExternalTableAws.java b/samples/snippets/src/main/java/com/example/bigquery/CreateExternalTableAws.java index 02dd669c8..25a749211 100644 --- a/samples/snippets/src/main/java/com/example/bigquery/CreateExternalTableAws.java +++ b/samples/snippets/src/main/java/com/example/bigquery/CreateExternalTableAws.java @@ -16,46 +16,62 @@ package com.example.bigquery; +// [START bigquery_omni_create_external_table] import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryException; import com.google.cloud.bigquery.BigQueryOptions; import com.google.cloud.bigquery.CsvOptions; import com.google.cloud.bigquery.ExternalTableDefinition; +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; import com.google.cloud.bigquery.TableId; import com.google.cloud.bigquery.TableInfo; +// Sample to create an external aws table public class CreateExternalTableAws { public static void main(String[] args) { // TODO(developer): Replace these variables before running the sample. + String projectId = "MY_PROJECT_ID"; String datasetName = "MY_DATASET_NAME"; String tableName = "MY_TABLE_NAME"; - // Create a aws connection - // projects/{project_id}/locations/{location_id}/connections/{connection_id} - String connectionId = "MY_CONNECTION_NAME"; + String connectionId = "MY_CONNECTION_ID"; String sourceUri = "s3://your-bucket-name/"; CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build(); + Schema schema = + Schema.of( + Field.of("name", StandardSQLTypeName.STRING), + Field.of("post_abbr", StandardSQLTypeName.STRING)); ExternalTableDefinition externalTableDefinition = ExternalTableDefinition.newBuilder(sourceUri, options) .setConnectionId(connectionId) + .setSchema(schema) .build(); - createExternalTableAws(datasetName, tableName, externalTableDefinition); + createExternalTableAws(projectId, datasetName, tableName, externalTableDefinition); } public static void createExternalTableAws( - String datasetName, String tableName, ExternalTableDefinition externalTableDefinition) { + String projectId, + String datasetName, + String tableName, + ExternalTableDefinition externalTableDefinition) { try { // Initialize client that will be used to send requests. This client only needs to be created // once, and can be reused for multiple requests. BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); - TableId tableId = TableId.of(datasetName, tableName); + TableId tableId = TableId.of(projectId, datasetName, tableName); TableInfo tableInfo = TableInfo.newBuilder(tableId, externalTableDefinition).build(); bigquery.create(tableInfo); System.out.println("Aws external table created successfully"); + + // Clean up + bigquery.delete(TableId.of(projectId, datasetName, tableName)); } catch (BigQueryException e) { System.out.println("Aws external was not created." + e.toString()); } } } +// [END bigquery_omni_create_external_table] diff --git a/samples/snippets/src/main/java/com/example/bigquery/QueryExternalTableAws.java b/samples/snippets/src/main/java/com/example/bigquery/QueryExternalTableAws.java new file mode 100644 index 000000000..0881e5e06 --- /dev/null +++ b/samples/snippets/src/main/java/com/example/bigquery/QueryExternalTableAws.java @@ -0,0 +1,68 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquery; + +// [START bigquery_omni_query_external_aws_s3] +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQueryException; +import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.CsvOptions; +import com.google.cloud.bigquery.DatasetId; +import com.google.cloud.bigquery.ExternalTableDefinition; +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.QueryJobConfiguration; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.TableId; +import com.google.cloud.bigquery.TableInfo; +import com.google.cloud.bigquery.TableResult; + +// Sample to queries an external data source aws s3 using a permanent table +public class QueryExternalTableAws { + + public static void main(String[] args) throws InterruptedException { + // TODO(developer): Replace these variables before running the sample. + String projectId = "MY_PROJECT_ID"; + String datasetName = "MY_DATASET_NAME"; + String externalTableName = "MY_EXTERNAL_TABLE_NAME"; + // Query to find states starting with 'W' + String query = + String.format( + "SELECT * FROM s%.%s.%s WHERE name LIKE 'W%%'", + projectId, datasetName, externalTableName); + queryExternalTableAws(query); + } + + public static void queryExternalTableAws(String query) throws InterruptedException { + try { + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. + BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); + + TableResult results = bigquery.query(QueryJobConfiguration.of(query)); + + results + .iterateAll() + .forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString()))); + + System.out.println("Query on aws external permanent table performed successfully."); + } catch (BigQueryException e) { + System.out.println("Query not performed \n" + e.toString()); + } + } +} +// [END bigquery_omni_query_external_aws_s3] diff --git a/samples/snippets/src/test/java/com/example/bigquery/CreateDatasetAwsIT.java b/samples/snippets/src/test/java/com/example/bigquery/CreateDatasetAwsIT.java index 6adb601f3..f4b5cf0f6 100644 --- a/samples/snippets/src/test/java/com/example/bigquery/CreateDatasetAwsIT.java +++ b/samples/snippets/src/test/java/com/example/bigquery/CreateDatasetAwsIT.java @@ -39,7 +39,7 @@ public class CreateDatasetAwsIT { private PrintStream out; private PrintStream originalPrintStream; - private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID"); + private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID"); private static String requireEnvVar(String varName) { String value = System.getenv(varName); @@ -51,7 +51,7 @@ private static String requireEnvVar(String varName) { @BeforeClass public static void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); + requireEnvVar("OMNI_PROJECT_ID"); } @Before @@ -66,7 +66,7 @@ public void setUp() { @After public void tearDown() { // Clean up - DeleteDataset.deleteDataset(PROJECT_ID, datasetName); + DeleteDataset.deleteDataset(OMNI_PROJECT_ID, datasetName); // restores print statements in the original method System.out.flush(); System.setOut(originalPrintStream); @@ -75,7 +75,7 @@ public void tearDown() { @Test public void testCreateDatasetAws() { - CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION); + CreateDatasetAws.createDatasetAws(OMNI_PROJECT_ID, datasetName, LOCATION); assertThat(bout.toString()).contains("Aws dataset created successfully :"); } } diff --git a/samples/snippets/src/test/java/com/example/bigquery/CreateExternalTableAwsIT.java b/samples/snippets/src/test/java/com/example/bigquery/CreateExternalTableAwsIT.java index 16b047398..47a9fa7a8 100644 --- a/samples/snippets/src/test/java/com/example/bigquery/CreateExternalTableAwsIT.java +++ b/samples/snippets/src/test/java/com/example/bigquery/CreateExternalTableAwsIT.java @@ -47,16 +47,14 @@ public class CreateExternalTableAwsIT { private static final String ID = UUID.randomUUID().toString().substring(0, 8); private static final String LOCATION = "aws-us-east-1"; private final Logger log = Logger.getLogger(this.getClass().getName()); - private String datasetName; private String tableName; - private String connectionName; private ByteArrayOutputStream bout; private PrintStream out; private PrintStream originalPrintStream; - private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID"); - private static final String AWS_ACCOUNT_ID = requireEnvVar("AWS_ACCOUNT_ID"); - private static final String AWS_ROLE_ID = requireEnvVar("AWS_ROLE_ID"); + private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID"); + private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME"); + private static final String AWS_READ_CONNECTION_ID = requireEnvVar("AWS_READ_CONNECTION_ID"); private static String requireEnvVar(String varName) { String value = System.getenv(varName); @@ -69,49 +67,21 @@ private static String requireEnvVar(String varName) { @BeforeClass public static void checkRequirements() { requireEnvVar("OMNI_PROJECT_ID"); - requireEnvVar("AWS_ACCOUNT_ID"); - requireEnvVar("AWS_ROLE_ID"); + requireEnvVar("OMNI_DATASET_NAME"); + requireEnvVar("AWS_READ_CONNECTION_ID"); } @Before - public void setUp() throws IOException { - datasetName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID; + public void setUp() { tableName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID; - connectionName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID; bout = new ByteArrayOutputStream(); out = new PrintStream(bout); originalPrintStream = System.out; System.setOut(out); - // create a temporary aws connection - try (ConnectionServiceClient client = ConnectionServiceClient.create()) { - LocationName parent = LocationName.of(PROJECT_ID, LOCATION); - String iamRoleId = String.format("arn:aws:iam::%s:role/%s", AWS_ACCOUNT_ID, AWS_ROLE_ID); - AwsCrossAccountRole role = AwsCrossAccountRole.newBuilder().setIamRoleId(iamRoleId).build(); - AwsProperties awsProperties = AwsProperties.newBuilder().setCrossAccountRole(role).build(); - Connection connection = Connection.newBuilder().setAws(awsProperties).build(); - CreateConnectionRequest request = - CreateConnectionRequest.newBuilder() - .setParent(parent.toString()) - .setConnection(connection) - .setConnectionId(connectionName) - .build(); - connectionName = client.createConnection(request).getName(); - } - // create a temporary dataset - CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION); } @After - public void tearDown() throws IOException { - // delete a temporary aws connection - try (ConnectionServiceClient client = ConnectionServiceClient.create()) { - DeleteConnectionRequest request = - DeleteConnectionRequest.newBuilder().setName(connectionName).build(); - client.deleteConnection(request); - } - // Clean up - DeleteTable.deleteTable(datasetName, tableName); - DeleteDataset.deleteDataset(PROJECT_ID, datasetName); + public void tearDown() { // restores print statements in the original method System.out.flush(); System.setOut(originalPrintStream); @@ -120,18 +90,19 @@ public void tearDown() throws IOException { @Test public void testCreateExternalTableAws() { - String sourceUri = "s3://cloud-samples-tests/us-states.csv"; + String sourceUri = "s3://omni-samples-test-bucket/us-states.csv"; Schema schema = Schema.of( Field.of("name", StandardSQLTypeName.STRING), Field.of("post_abbr", StandardSQLTypeName.STRING)); CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build(); - ExternalTableDefinition externalTable = + ExternalTableDefinition externalTableDefinition = ExternalTableDefinition.newBuilder(sourceUri, options) - .setConnectionId(connectionName) + .setConnectionId(AWS_READ_CONNECTION_ID) .setSchema(schema) .build(); - CreateExternalTableAws.createExternalTableAws(datasetName, tableName, externalTable); + CreateExternalTableAws.createExternalTableAws( + OMNI_PROJECT_ID, OMNI_DATASET_NAME, tableName, externalTableDefinition); assertThat(bout.toString()).contains("Aws external table created successfully"); } } diff --git a/samples/snippets/src/test/java/com/example/bigquery/QueryExternalTableAwsIT.java b/samples/snippets/src/test/java/com/example/bigquery/QueryExternalTableAwsIT.java new file mode 100644 index 000000000..3ef76ebd7 --- /dev/null +++ b/samples/snippets/src/test/java/com/example/bigquery/QueryExternalTableAwsIT.java @@ -0,0 +1,97 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquery; + +import static com.google.common.truth.Truth.assertThat; +import static junit.framework.TestCase.assertNotNull; + +import com.google.cloud.bigquery.CsvOptions; +import com.google.cloud.bigquery.ExternalTableDefinition; +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.connection.v1.AwsCrossAccountRole; +import com.google.cloud.bigquery.connection.v1.AwsProperties; +import com.google.cloud.bigquery.connection.v1.Connection; +import com.google.cloud.bigquery.connection.v1.CreateConnectionRequest; +import com.google.cloud.bigquery.connection.v1.DeleteConnectionRequest; +import com.google.cloud.bigquery.connection.v1.LocationName; +import com.google.cloud.bigqueryconnection.v1.ConnectionServiceClient; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.UUID; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class QueryExternalTableAwsIT { + + private final Logger log = Logger.getLogger(this.getClass().getName()); + private ByteArrayOutputStream bout; + private PrintStream out; + private PrintStream originalPrintStream; + + private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID"); + private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME"); + private static final String OMNI_EXTERNAL_TABLE_NAME = requireEnvVar("OMNI_EXTERNAL_TABLE_NAME"); + + private static String requireEnvVar(String varName) { + String value = System.getenv(varName); + assertNotNull( + "Environment variable " + varName + " is required to perform these tests.", + System.getenv(varName)); + return value; + } + + @BeforeClass + public static void checkRequirements() { + requireEnvVar("OMNI_PROJECT_ID"); + requireEnvVar("OMNI_DATASET_NAME"); + requireEnvVar("OMNI_EXTERNAL_TABLE_NAME"); + } + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + originalPrintStream = System.out; + System.setOut(out); + } + + @After + public void tearDown() { + // restores print statements in the original method + System.out.flush(); + System.setOut(originalPrintStream); + log.log(Level.INFO, bout.toString()); + } + + @Test + public void testQueryExternalTableAws() throws InterruptedException { + String query = + String.format( + "SELECT * FROM %s.%s.%s WHERE name LIKE 'W%%'", + OMNI_PROJECT_ID, OMNI_DATASET_NAME, OMNI_EXTERNAL_TABLE_NAME); + QueryExternalTableAws.queryExternalTableAws(query); + assertThat(bout.toString()) + .contains("Query on aws external permanent table performed successfully."); + } +}