Skip to content

Commit

Permalink
feat(bigquery): expose identifiers using a variety of formats (#5017)
Browse files Browse the repository at this point in the history
* feat(bigquery): expose identifiers using a variety of formats

This PR adds an Identifier() method to common BQ resources so
that users can get an identifier that is formatted appropriately
for their use case (legacy sql, standard sql, referencing in storage
API, etc).

Existing instances of FullyQualifiedName() have been migrated to the
new method.

Fixes: #1955
  • Loading branch information
shollyman committed Dec 1, 2021
1 parent f58a9f7 commit c9cd984
Show file tree
Hide file tree
Showing 9 changed files with 365 additions and 19 deletions.
20 changes: 20 additions & 0 deletions bigquery/dataset.go
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"errors"
"fmt"
"strings"
"time"

"cloud.google.com/go/internal/optional"
Expand Down Expand Up @@ -88,6 +89,25 @@ func (c *Client) DatasetInProject(projectID, datasetID string) *Dataset {
}
}

// Identifier returns the ID of the dataset in the requested format.
//
// For Standard SQL format, the identifier will be quoted if the
// ProjectID contains dash (-) characters.
func (d *Dataset) Identifier(f IdentifierFormat) (string, error) {
switch f {
case LegacySQLID:
return fmt.Sprintf("%s:%s", d.ProjectID, d.DatasetID), nil
case StandardSQLID:
// Quote project identifiers if they have a dash character.
if strings.Contains(d.ProjectID, "-") {
return fmt.Sprintf("`%s`.%s", d.ProjectID, d.DatasetID), nil
}
return fmt.Sprintf("%s.%s", d.ProjectID, d.DatasetID), nil
default:
return "", ErrUnknownIdentifierFormat
}
}

// Create creates a dataset in the BigQuery service. An error will be returned if the
// dataset already exists. Pass in a DatasetMetadata value to configure the dataset.
func (d *Dataset) Create(ctx context.Context, md *DatasetMetadata) (err error) {
Expand Down
60 changes: 60 additions & 0 deletions bigquery/dataset_test.go
Expand Up @@ -476,3 +476,63 @@ func TestConvertAccessEntry(t *testing.T) {
t.Error("got nil, want error")
}
}

func TestDatasetIdentifiers(t *testing.T) {
testDataset := &Dataset{
ProjectID: "p",
DatasetID: "d",
c: nil,
}
for _, tc := range []struct {
description string
in *Dataset
format IdentifierFormat
want string
wantErr bool
}{
{
description: "empty format string",
in: testDataset,
format: "",
wantErr: true,
},
{
description: "legacy",
in: testDataset,
format: LegacySQLID,
want: "p:d",
},
{
description: "standard unquoted",
in: testDataset,
format: StandardSQLID,
want: "p.d",
},
{
description: "standard w/quoting",
in: &Dataset{ProjectID: "p-p", DatasetID: "d"},
format: StandardSQLID,
want: "`p-p`.d",
},
{
description: "api resource",
in: testDataset,
format: StorageAPIResourceID,
wantErr: true,
},
} {
got, err := tc.in.Identifier(tc.format)
if tc.wantErr && err == nil {
t.Errorf("case %q: wanted err, was success", tc.description)
}
if !tc.wantErr {
if err != nil {
t.Errorf("case %q: wanted success, got err: %v", tc.description, err)
} else {
if got != tc.want {
t.Errorf("case %q: got %s, want %s", tc.description, got, tc.want)
}
}
}
}
}
38 changes: 22 additions & 16 deletions bigquery/integration_test.go
Expand Up @@ -371,12 +371,12 @@ func TestIntegration_TableCreateView(t *testing.T) {
}
ctx := context.Background()
table := newTable(t, schema)
tableIdentifier, _ := table.Identifier(StandardSQLID)
defer table.Delete(ctx)

// Test that standard SQL views work.
view := dataset.Table("t_view_standardsql")
query := fmt.Sprintf("SELECT APPROX_COUNT_DISTINCT(name) FROM `%s.%s.%s`",
dataset.ProjectID, dataset.DatasetID, table.TableID)
query := fmt.Sprintf("SELECT APPROX_COUNT_DISTINCT(name) FROM %s", tableIdentifier)
err := view.Create(context.Background(), &TableMetadata{
ViewQuery: query,
UseStandardSQL: true,
Expand Down Expand Up @@ -936,10 +936,11 @@ func TestIntegration_DatasetUpdateAccess(t *testing.T) {
// Create a sample UDF so we can verify adding authorized UDFs
routineID := routineIDs.New()
routine := dataset.Routine(routineID)
routineSQLID, _ := routine.Identifier(StandardSQLID)

sql := fmt.Sprintf(`
CREATE FUNCTION `+"`%s`"+`(x INT64) AS (x * 3);`,
routine.FullyQualifiedName())
CREATE FUNCTION %s(x INT64) AS (x * 3);`,
routineSQLID)
if _, _, err := runQuerySQL(ctx, sql); err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1348,13 +1349,14 @@ func TestIntegration_RoutineStoredProcedure(t *testing.T) {
// Define a simple stored procedure via DDL.
routineID := routineIDs.New()
routine := dataset.Routine(routineID)
routineSQLID, _ := routine.Identifier(StandardSQLID)
sql := fmt.Sprintf(`
CREATE OR REPLACE PROCEDURE `+"`%s`"+`(val INT64)
CREATE OR REPLACE PROCEDURE %s(val INT64)
BEGIN
SELECT CURRENT_TIMESTAMP() as ts;
SELECT val * 2 as f2;
END`,
routine.FullyQualifiedName())
routineSQLID)

if _, _, err := runQuerySQL(ctx, sql); err != nil {
t.Fatal(err)
Expand All @@ -1363,8 +1365,8 @@ func TestIntegration_RoutineStoredProcedure(t *testing.T) {

// Invoke the stored procedure.
sql = fmt.Sprintf(`
CALL `+"`%s`"+`(5)`,
routine.FullyQualifiedName())
CALL %s(5)`,
routineSQLID)

q := client.Query(sql)
it, err := q.Read(ctx)
Expand Down Expand Up @@ -2354,8 +2356,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {
}
defer customTable.Delete(ctx)

customTableSQLID, _ := customTable.Identifier(StandardSQLID)

// Issue a test query that prunes based on the custom hive partitioning key, and verify the result is as expected.
sql := fmt.Sprintf("SELECT COUNT(*) as ct FROM `%s`.%s.%s WHERE pkey=\"foo\"", customTable.ProjectID, customTable.DatasetID, customTable.TableID)
sql := fmt.Sprintf("SELECT COUNT(*) as ct FROM %s WHERE pkey=\"foo\"", customTableSQLID)
q := client.Query(sql)
it, err := q.Read(ctx)
if err != nil {
Expand Down Expand Up @@ -3227,10 +3231,10 @@ func TestIntegration_ModelLifecycle(t *testing.T) {
// Create a model via a CREATE MODEL query
modelID := modelIDs.New()
model := dataset.Model(modelID)
modelRef := fmt.Sprintf("%s.%s.%s", dataset.ProjectID, dataset.DatasetID, modelID)
modelSQLID, _ := model.Identifier(StandardSQLID)

sql := fmt.Sprintf(`
CREATE MODEL `+"`%s`"+`
CREATE MODEL %s
OPTIONS (
model_type='linear_reg',
max_iteration=1,
Expand All @@ -3240,7 +3244,7 @@ func TestIntegration_ModelLifecycle(t *testing.T) {
SELECT 'a' AS f1, 2.0 AS label
UNION ALL
SELECT 'b' AS f1, 3.8 AS label
)`, modelRef)
)`, modelSQLID)
if _, _, err := runQuerySQL(ctx, sql); err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -3417,13 +3421,14 @@ func TestIntegration_RoutineComplexTypes(t *testing.T) {

routineID := routineIDs.New()
routine := dataset.Routine(routineID)
routineSQLID, _ := routine.Identifier(StandardSQLID)
sql := fmt.Sprintf(`
CREATE FUNCTION `+"`%s`("+`
CREATE FUNCTION %s(
arr ARRAY<STRUCT<name STRING, val INT64>>
) AS (
(SELECT SUM(IF(elem.name = "foo",elem.val,null)) FROM UNNEST(arr) AS elem)
)`,
routine.FullyQualifiedName())
routineSQLID)
if _, _, err := runQuerySQL(ctx, sql); err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -3480,10 +3485,11 @@ func TestIntegration_RoutineLifecycle(t *testing.T) {
// Create a scalar UDF routine via a CREATE FUNCTION query
routineID := routineIDs.New()
routine := dataset.Routine(routineID)
routineSQLID, _ := routine.Identifier(StandardSQLID)

sql := fmt.Sprintf(`
CREATE FUNCTION `+"`%s`"+`(x INT64) AS (x * 3);`,
routine.FullyQualifiedName())
CREATE FUNCTION %s(x INT64) AS (x * 3);`,
routineSQLID)
if _, _, err := runQuerySQL(ctx, sql); err != nil {
t.Fatal(err)
}
Expand Down
25 changes: 24 additions & 1 deletion bigquery/model.go
Expand Up @@ -17,6 +17,7 @@ package bigquery
import (
"context"
"fmt"
"strings"
"time"

"cloud.google.com/go/internal/optional"
Expand All @@ -41,9 +42,31 @@ type Model struct {
c *Client
}

// Identifier returns the ID of the model in the requested format.
//
// For Standard SQL format, the identifier will be quoted if the
// ProjectID contains dash (-) characters.
func (m *Model) Identifier(f IdentifierFormat) (string, error) {
switch f {
case LegacySQLID:
return fmt.Sprintf("%s:%s.%s", m.ProjectID, m.DatasetID, m.ModelID), nil
case StandardSQLID:
// Per https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create#model_name
// we quote the entire identifier.
out := fmt.Sprintf("%s.%s.%s", m.ProjectID, m.DatasetID, m.ModelID)
if strings.Contains(out, "-") {
out = fmt.Sprintf("`%s`", out)
}
return out, nil
default:
return "", ErrUnknownIdentifierFormat
}
}

// FullyQualifiedName returns the ID of the model in projectID:datasetID.modelid format.
func (m *Model) FullyQualifiedName() string {
return fmt.Sprintf("%s:%s.%s", m.ProjectID, m.DatasetID, m.ModelID)
s, _ := m.Identifier(LegacySQLID)
return s
}

func (m *Model) toBQ() *bq.ModelReference {
Expand Down
61 changes: 61 additions & 0 deletions bigquery/model_test.go
Expand Up @@ -120,3 +120,64 @@ func TestModelMetadataUpdateToBQ(t *testing.T) {
}
}
}

func TestModelIdentifiers(t *testing.T) {
testModel := &Model{
ProjectID: "p",
DatasetID: "d",
ModelID: "m",
c: nil,
}
for _, tc := range []struct {
description string
in *Model
format IdentifierFormat
want string
wantErr bool
}{
{
description: "empty format string",
in: testModel,
format: "",
wantErr: true,
},
{
description: "legacy",
in: testModel,
format: LegacySQLID,
want: "p:d.m",
},
{
description: "standard unquoted",
in: testModel,
format: StandardSQLID,
want: "p.d.m",
},
{
description: "standard w/dash",
in: &Model{ProjectID: "p-p", DatasetID: "d", ModelID: "m"},
format: StandardSQLID,
want: "`p-p.d.m`",
},
{
description: "api resource",
in: testModel,
format: StorageAPIResourceID,
wantErr: true,
},
} {
got, err := tc.in.Identifier(tc.format)
if tc.wantErr && err == nil {
t.Errorf("case %q: wanted err, was success", tc.description)
}
if !tc.wantErr {
if err != nil {
t.Errorf("case %q: wanted success, got err: %v", tc.description, err)
} else {
if got != tc.want {
t.Errorf("case %q: got %s, want %s", tc.description, got, tc.want)
}
}
}
}
}
20 changes: 19 additions & 1 deletion bigquery/routine.go
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"errors"
"fmt"
"strings"
"time"

"cloud.google.com/go/internal/optional"
Expand All @@ -44,9 +45,26 @@ func (r *Routine) toBQ() *bq.RoutineReference {
}
}

// Identifier returns the ID of the routine in the requested format.
//
// For Standard SQL format, the identifier will be quoted if the
// ProjectID contains dash (-) characters.
func (r *Routine) Identifier(f IdentifierFormat) (string, error) {
switch f {
case StandardSQLID:
if strings.Contains(r.ProjectID, "-") {
return fmt.Sprintf("`%s`.%s.%s", r.ProjectID, r.DatasetID, r.RoutineID), nil
}
return fmt.Sprintf("%s.%s.%s", r.ProjectID, r.DatasetID, r.RoutineID), nil
default:
return "", ErrUnknownIdentifierFormat
}
}

// FullyQualifiedName returns an identifer for the routine in project.dataset.routine format.
func (r *Routine) FullyQualifiedName() string {
return fmt.Sprintf("%s.%s.%s", r.ProjectID, r.DatasetID, r.RoutineID)
s, _ := r.Identifier(StandardSQLID)
return s
}

// Create creates a Routine in the BigQuery service.
Expand Down

0 comments on commit c9cd984

Please sign in to comment.