New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
docs(samples): add update table using dml query sample #424
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} | ||
{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} | ||
{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} | ||
{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} | ||
{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} | ||
{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} | ||
{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} | ||
{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} | ||
{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} | ||
{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} | ||
{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} | ||
{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} | ||
{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} | ||
{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} | ||
{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} | ||
{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} | ||
{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} | ||
{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} | ||
{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* Copyright 2020 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.example.bigquery; | ||
|
||
// [START bigquery_update_with_dml] | ||
import com.google.cloud.bigquery.BigQuery; | ||
import com.google.cloud.bigquery.BigQueryException; | ||
import com.google.cloud.bigquery.BigQueryOptions; | ||
import com.google.cloud.bigquery.FormatOptions; | ||
import com.google.cloud.bigquery.Job; | ||
import com.google.cloud.bigquery.JobId; | ||
import com.google.cloud.bigquery.QueryJobConfiguration; | ||
import com.google.cloud.bigquery.QueryParameterValue; | ||
import com.google.cloud.bigquery.TableDataWriteChannel; | ||
import com.google.cloud.bigquery.TableId; | ||
import com.google.cloud.bigquery.TableResult; | ||
import com.google.cloud.bigquery.WriteChannelConfiguration; | ||
import java.io.IOException; | ||
import java.io.OutputStream; | ||
import java.nio.channels.Channels; | ||
import java.nio.file.FileSystems; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.UUID; | ||
|
||
// Sample to update data in BigQuery tables using DML query | ||
public class UpdateTableDML { | ||
|
||
public static void runUpdateTableDML() throws IOException, InterruptedException { | ||
// TODO(developer): Replace these variables before running the sample. | ||
String datasetName = "MY_DATASET_NAME"; | ||
String tableName = "MY_TABLE_NAME"; | ||
updateTableDML(datasetName, tableName); | ||
} | ||
|
||
public static void updateTableDML(String datasetName, String tableName) | ||
throws IOException, InterruptedException { | ||
try { | ||
// Initialize client that will be used to send requests. This client only needs to be created | ||
// once, and can be reused for multiple requests. | ||
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); | ||
|
||
// Load JSON file into UserSessions table | ||
TableId tableId = TableId.of(datasetName, tableName); | ||
|
||
WriteChannelConfiguration writeChannelConfiguration = | ||
WriteChannelConfiguration.newBuilder(tableId) | ||
.setFormatOptions(FormatOptions.json()) | ||
.build(); | ||
|
||
// Imports a local JSON file into a table. | ||
Path jsonPath = FileSystems.getDefault().getPath("src/test/resources", "userSessionsData.json"); | ||
|
||
// The location and JobName must be specified; other fields can be auto-detected. | ||
String jobName = "jobId_" + UUID.randomUUID().toString(); | ||
JobId jobId = JobId.newBuilder().setLocation("us").setJob(jobName).build(); | ||
|
||
try (TableDataWriteChannel writer = bigquery.writer(jobId, writeChannelConfiguration); | ||
OutputStream stream = Channels.newOutputStream(writer)) { | ||
Files.copy(jsonPath, stream); | ||
} | ||
|
||
// Get the Job created by the TableDataWriteChannel and wait for it to complete. | ||
Job job = bigquery.getJob(jobId); | ||
Job completedJob = job.waitFor(); | ||
if (completedJob == null) { | ||
System.out.println("Job not executed since it no longer exists."); | ||
return; | ||
} else if (completedJob.getStatus().getError() != null) { | ||
System.out.println( | ||
"BigQuery was unable to load local file to the table due to an error: \n" | ||
+ job.getStatus().getError()); | ||
return; | ||
} | ||
|
||
System.out.println(job.getStatistics().toString() + " userSessionsData json uploaded successfully"); | ||
|
||
// Write a DML query to modify UserSessions table | ||
// To create DML query job to mask the last octet in every row's ip_address column | ||
String dmlQuery = String.format("UPDATE `%s.%s` \n" | ||
+ "SET ip_address = REGEXP_REPLACE(ip_address, r\"(\\.[0-9]+)$\", \".0\")\n" | ||
+ "WHERE TRUE", datasetName, tableName); | ||
|
||
QueryJobConfiguration dmlQueryConfig = | ||
QueryJobConfiguration.newBuilder(dmlQuery).build(); | ||
|
||
// Execute the query. | ||
TableResult result = bigquery.query(dmlQueryConfig); | ||
|
||
// Print the results. | ||
result.iterateAll().forEach(rows -> rows.forEach(row -> System.out.println(row.getValue()))); | ||
|
||
System.out.println("Table updated successfully using DML"); | ||
} catch (BigQueryException e) { | ||
System.out.println("Table update failed \n" + e.toString()); | ||
} | ||
} | ||
} | ||
// [END bigquery_update_with_dml] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* Copyright 2020 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.example.bigquery; | ||
|
||
import static com.google.common.truth.Truth.assertThat; | ||
import static junit.framework.TestCase.assertNotNull; | ||
|
||
import com.google.cloud.bigquery.Field; | ||
import com.google.cloud.bigquery.LegacySQLTypeName; | ||
import com.google.cloud.bigquery.Schema; | ||
import java.io.ByteArrayOutputStream; | ||
import java.io.IOException; | ||
import java.io.PrintStream; | ||
import java.nio.file.FileSystems; | ||
import java.nio.file.Path; | ||
import java.util.UUID; | ||
import org.junit.After; | ||
import org.junit.Before; | ||
import org.junit.BeforeClass; | ||
import org.junit.Test; | ||
|
||
public class UpdateTableDMLIT { | ||
private ByteArrayOutputStream bout; | ||
private PrintStream out; | ||
|
||
private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME"); | ||
|
||
private static void requireEnvVar(String varName) { | ||
assertNotNull( | ||
"Environment variable " + varName + " is required to perform these tests.", | ||
System.getenv(varName)); | ||
} | ||
|
||
@BeforeClass | ||
public static void checkRequirements() { | ||
requireEnvVar("BIGQUERY_DATASET_NAME"); | ||
} | ||
|
||
@Before | ||
public void setUp() { | ||
bout = new ByteArrayOutputStream(); | ||
out = new PrintStream(bout); | ||
System.setOut(out); | ||
} | ||
|
||
@After | ||
public void tearDown() { | ||
System.setOut(null); | ||
} | ||
|
||
@Test | ||
public void testUpdateTableDML() throws IOException, InterruptedException { | ||
String tableName = "UserSessions_TEST_" + UUID.randomUUID().toString().replace('-', '_'); | ||
Schema schema = | ||
Schema.of( | ||
Field.of("id", LegacySQLTypeName.STRING), | ||
Field.of("user_id", LegacySQLTypeName.STRING), | ||
Field.of("login_time", LegacySQLTypeName.STRING), | ||
Field.of("logout_time", LegacySQLTypeName.STRING), | ||
Field.of("ip_address", LegacySQLTypeName.STRING)); | ||
|
||
CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, schema); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be set up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have always done this in the test method in all the other similar samples There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this one isn't a big deal, but logically it does make sense that "setUp" call since it's not directly related to a test. (consider this one a nit) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
UpdateTableDML.updateTableDML(BIGQUERY_DATASET_NAME, tableName); | ||
|
||
assertThat(bout.toString()).contains("Table updated successfully using DML"); | ||
|
||
// Clean up | ||
DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be teardown? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one also we have been doing this in the method itself... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one is more of a concern - if any exception is thrown before this this step won't be executed. This means failures will leak resources. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} | ||
{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} | ||
{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} | ||
{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} | ||
{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} | ||
{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} | ||
{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} | ||
{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} | ||
{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} | ||
{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are using
getenv
here, but also have requireEnvVar below. Maybe userequireEnvVar
here but have it to return a value?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is how we have been handling this in all the samples in BigQuery... is there a new standard?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right, this seems to be the same as being used in other places. However, I think maybe this pattern might be improved if we did something like this instead;
This way we only have to specify "BIGQUERY_DATASET_NAME" in one pace.
(consider this one a nit)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
okay makes sense - done