Skip to content

Commit

Permalink
feat(spanner/spansql): Support multiple joins in query (#4743)
Browse files Browse the repository at this point in the history
* Fixes #3180

#3966 is implementing the same functionality (and more) but the author has lost interest and I don't have the knowledge to take over the unnest parts of his PR, I felt my solution to multiple joins is simple enough the other functionality could be rebased on top of it assuming a new owner takes it on.
  • Loading branch information
philwitty committed Sep 14, 2021
1 parent c6d5189 commit 81a308e
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 62 deletions.
83 changes: 49 additions & 34 deletions spanner/spannertest/integration_test.go
Expand Up @@ -652,12 +652,12 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
) PRIMARY KEY (LastName, OpponentID)`, // TODO: is this right?
// JoinFoo are from https://cloud.google.com/spanner/docs/query-syntax#join_types.
// They aren't consistently named in the docs.
`CREATE TABLE JoinA ( w INT64, x STRING(MAX) ) PRIMARY KEY (w, x)`,
`CREATE TABLE JoinB ( y INT64, z STRING(MAX) ) PRIMARY KEY (y, z)`,
`CREATE TABLE JoinC ( x INT64, y STRING(MAX) ) PRIMARY KEY (x, y)`,
`CREATE TABLE JoinD ( x INT64, z STRING(MAX) ) PRIMARY KEY (x, z)`,
`CREATE TABLE JoinE ( w INT64, x STRING(MAX) ) PRIMARY KEY (w, x)`,
`CREATE TABLE JoinF ( y INT64, z STRING(MAX) ) PRIMARY KEY (y, z)`,
`CREATE TABLE JoinA ( w INT64, x STRING(MAX), a STRING(MAX) ) PRIMARY KEY (w, x)`,
`CREATE TABLE JoinB ( y INT64, z STRING(MAX), b STRING(MAX) ) PRIMARY KEY (y, z)`,
`CREATE TABLE JoinC ( x INT64, y STRING(MAX), c STRING(MAX) ) PRIMARY KEY (x, y)`,
`CREATE TABLE JoinD ( x INT64, z STRING(MAX), d STRING(MAX) ) PRIMARY KEY (x, z)`,
`CREATE TABLE JoinE ( w INT64, x STRING(MAX), e STRING(MAX) ) PRIMARY KEY (w, x)`,
`CREATE TABLE JoinF ( y INT64, z STRING(MAX), f STRING(MAX) ) PRIMARY KEY (y, z)`,
// Some other test tables.
`CREATE TABLE SomeStrings ( i INT64, str STRING(MAX) ) PRIMARY KEY (i)`,
`CREATE TABLE Updateable (
Expand All @@ -676,33 +676,33 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
spanner.Insert("PlayerStats", []string{"LastName", "OpponentID", "PointsScored"}, []interface{}{"Adams", 52, 4}),
spanner.Insert("PlayerStats", []string{"LastName", "OpponentID", "PointsScored"}, []interface{}{"Buchanan", 50, 13}),

spanner.Insert("JoinA", []string{"w", "x"}, []interface{}{1, "a"}),
spanner.Insert("JoinA", []string{"w", "x"}, []interface{}{2, "b"}),
spanner.Insert("JoinA", []string{"w", "x"}, []interface{}{3, "c"}),
spanner.Insert("JoinA", []string{"w", "x"}, []interface{}{3, "d"}),
spanner.Insert("JoinA", []string{"w", "x", "a"}, []interface{}{1, "a", "a1"}),
spanner.Insert("JoinA", []string{"w", "x", "a"}, []interface{}{2, "b", "a2"}),
spanner.Insert("JoinA", []string{"w", "x", "a"}, []interface{}{3, "c", "a3"}),
spanner.Insert("JoinA", []string{"w", "x", "a"}, []interface{}{3, "d", "a4"}),

spanner.Insert("JoinB", []string{"y", "z"}, []interface{}{2, "k"}),
spanner.Insert("JoinB", []string{"y", "z"}, []interface{}{3, "m"}),
spanner.Insert("JoinB", []string{"y", "z"}, []interface{}{3, "n"}),
spanner.Insert("JoinB", []string{"y", "z"}, []interface{}{4, "p"}),
spanner.Insert("JoinB", []string{"y", "z", "b"}, []interface{}{2, "k", "b1"}),
spanner.Insert("JoinB", []string{"y", "z", "b"}, []interface{}{3, "m", "b2"}),
spanner.Insert("JoinB", []string{"y", "z", "b"}, []interface{}{3, "n", "b3"}),
spanner.Insert("JoinB", []string{"y", "z", "b"}, []interface{}{4, "p", "b4"}),

// JoinC and JoinD have the same contents as JoinA and JoinB; they have different column names.
spanner.Insert("JoinC", []string{"x", "y"}, []interface{}{1, "a"}),
spanner.Insert("JoinC", []string{"x", "y"}, []interface{}{2, "b"}),
spanner.Insert("JoinC", []string{"x", "y"}, []interface{}{3, "c"}),
spanner.Insert("JoinC", []string{"x", "y"}, []interface{}{3, "d"}),
spanner.Insert("JoinC", []string{"x", "y", "c"}, []interface{}{1, "a", "c1"}),
spanner.Insert("JoinC", []string{"x", "y", "c"}, []interface{}{2, "b", "c2"}),
spanner.Insert("JoinC", []string{"x", "y", "c"}, []interface{}{3, "c", "c3"}),
spanner.Insert("JoinC", []string{"x", "y", "c"}, []interface{}{3, "d", "c4"}),

spanner.Insert("JoinD", []string{"x", "z"}, []interface{}{2, "k"}),
spanner.Insert("JoinD", []string{"x", "z"}, []interface{}{3, "m"}),
spanner.Insert("JoinD", []string{"x", "z"}, []interface{}{3, "n"}),
spanner.Insert("JoinD", []string{"x", "z"}, []interface{}{4, "p"}),
spanner.Insert("JoinD", []string{"x", "z", "d"}, []interface{}{2, "k", "d1"}),
spanner.Insert("JoinD", []string{"x", "z", "d"}, []interface{}{3, "m", "d2"}),
spanner.Insert("JoinD", []string{"x", "z", "d"}, []interface{}{3, "n", "d3"}),
spanner.Insert("JoinD", []string{"x", "z", "d"}, []interface{}{4, "p", "d4"}),

// JoinE and JoinF are used in the CROSS JOIN test.
spanner.Insert("JoinE", []string{"w", "x"}, []interface{}{1, "a"}),
spanner.Insert("JoinE", []string{"w", "x"}, []interface{}{2, "b"}),
spanner.Insert("JoinE", []string{"w", "x", "e"}, []interface{}{1, "a", "e1"}),
spanner.Insert("JoinE", []string{"w", "x", "e"}, []interface{}{2, "b", "e2"}),

spanner.Insert("JoinF", []string{"y", "z"}, []interface{}{2, "c"}),
spanner.Insert("JoinF", []string{"y", "z"}, []interface{}{3, "d"}),
spanner.Insert("JoinF", []string{"y", "z", "f"}, []interface{}{2, "c", "f1"}),
spanner.Insert("JoinF", []string{"y", "z", "f"}, []interface{}{3, "d", "f2"}),

spanner.Insert("SomeStrings", []string{"i", "str"}, []interface{}{0, "afoo"}),
spanner.Insert("SomeStrings", []string{"i", "str"}, []interface{}{1, "abar"}),
Expand Down Expand Up @@ -1011,7 +1011,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
// Joins.
{
`SELECT * FROM JoinA INNER JOIN JoinB ON JoinA.w = JoinB.y ORDER BY w, x, y, z`,
`SELECT w, x, y, z FROM JoinA INNER JOIN JoinB ON JoinA.w = JoinB.y ORDER BY w, x, y, z`,
nil,
[][]interface{}{
{int64(2), "b", int64(2), "k"},
Expand All @@ -1022,7 +1022,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
},
{
`SELECT * FROM JoinE CROSS JOIN JoinF ORDER BY w, x, y, z`,
`SELECT w, x, y, z FROM JoinE CROSS JOIN JoinF ORDER BY w, x, y, z`,
nil,
[][]interface{}{
{int64(1), "a", int64(2), "c"},
Expand All @@ -1033,7 +1033,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
{
// Same as in docs, but with a weird ORDER BY clause to match the row ordering.
`SELECT * FROM JoinA FULL OUTER JOIN JoinB ON JoinA.w = JoinB.y ORDER BY w IS NULL, w, x, y, z`,
`SELECT w, x, y, z FROM JoinA FULL OUTER JOIN JoinB ON JoinA.w = JoinB.y ORDER BY w IS NULL, w, x, y, z`,
nil,
[][]interface{}{
{int64(1), "a", nil, nil},
Expand All @@ -1047,7 +1047,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
{
// Same as the previous, but using a USING clause instead of an ON clause.
`SELECT * FROM JoinC FULL OUTER JOIN JoinD USING (x) ORDER BY x, y, z`,
`SELECT x, y, z FROM JoinC FULL OUTER JOIN JoinD USING (x) ORDER BY x, y, z`,
nil,
[][]interface{}{
{int64(1), "a", nil},
Expand All @@ -1060,7 +1060,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
},
{
`SELECT * FROM JoinA LEFT OUTER JOIN JoinB AS B ON JoinA.w = B.y ORDER BY w, x, y, z`,
`SELECT w, x, y, z FROM JoinA LEFT OUTER JOIN JoinB AS B ON JoinA.w = B.y ORDER BY w, x, y, z`,
nil,
[][]interface{}{
{int64(1), "a", nil, nil},
Expand All @@ -1073,7 +1073,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
{
// Same as the previous, but using a USING clause instead of an ON clause.
`SELECT * FROM JoinC LEFT OUTER JOIN JoinD USING (x) ORDER BY x, y, z`,
`SELECT x, y, z FROM JoinC LEFT OUTER JOIN JoinD USING (x) ORDER BY x, y, z`,
nil,
[][]interface{}{
{int64(1), "a", nil},
Expand All @@ -1086,7 +1086,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
{
// Same as in docs, but with a weird ORDER BY clause to match the row ordering.
`SELECT * FROM JoinA RIGHT OUTER JOIN JoinB AS B ON JoinA.w = B.y ORDER BY w IS NULL, w, x, y, z`,
`SELECT w, x, y, z FROM JoinA RIGHT OUTER JOIN JoinB AS B ON JoinA.w = B.y ORDER BY w IS NULL, w, x, y, z`,
nil,
[][]interface{}{
{int64(2), "b", int64(2), "k"},
Expand All @@ -1098,7 +1098,7 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
},
},
{
`SELECT * FROM JoinC RIGHT OUTER JOIN JoinD USING (x) ORDER BY x, y, z`,
`SELECT x, y, z FROM JoinC RIGHT OUTER JOIN JoinD USING (x) ORDER BY x, y, z`,
nil,
[][]interface{}{
{int64(2), "b", "k"},
Expand All @@ -1109,6 +1109,21 @@ func TestIntegration_ReadsAndQueries(t *testing.T) {
{int64(4), nil, "p"},
},
},
{
`SELECT a, b, c FROM JoinA JOIN JoinB ON JoinA.w = JoinB.y JOIN JoinC ON JoinA.w = JoinC.x WHERE JoinA.w = 2 ORDER BY x, y, z`,
nil,
[][]interface{}{
{"a2", "b1", "c2"},
},
},
{
`SELECT a, b, c FROM JoinA LEFT JOIN JoinB ON JoinA.w = JoinB.y JOIN JoinC ON JoinC.x = JoinA.w WHERE JoinA.w = 1 OR JoinA.w = 2 ORDER BY x, y, z`,
nil,
[][]interface{}{
{"a1", nil, "c1"},
{"a2", "b1", "c2"},
},
},
// Check the output of the UPDATE DML.
{
`SELECT id, first, last FROM Updateable ORDER BY id`,
Expand Down
80 changes: 52 additions & 28 deletions spanner/spansql/parser.go
Expand Up @@ -2101,28 +2101,7 @@ func (p *parser) parseSelectList() ([]Expr, []ID, *parseError) {
return list, aliases, nil
}

func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
debugf("parseSelectFrom: %v", p)

/*
from_item: {
table_name [ table_hint_expr ] [ [ AS ] alias ] |
join |
( query_expr ) [ table_hint_expr ] [ [ AS ] alias ] |
field_path |
{ UNNEST( array_expression ) | UNNEST( array_path ) | array_path }
[ table_hint_expr ] [ [ AS ] alias ] [ WITH OFFSET [ [ AS ] alias ] ] |
with_query_name [ table_hint_expr ] [ [ AS ] alias ]
}
join:
from_item [ join_type ] [ join_method ] JOIN [ join_hint_expr ] from_item
[ ON bool_expression | USING ( join_column [, ...] ) ]
join_type:
{ INNER | CROSS | FULL [OUTER] | LEFT [OUTER] | RIGHT [OUTER] }
*/

func (p *parser) parseSelectFromTable() (SelectFrom, *parseError) {
if p.eat("UNNEST") {
if err := p.expect("("); err != nil {
return nil, err
Expand Down Expand Up @@ -2171,19 +2150,22 @@ func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
}
sf.Alias = alias
}
return sf, nil
}

func (p *parser) parseSelectFromJoin(lhs SelectFrom) (SelectFrom, *parseError) {
// Look ahead to see if this is a join.
tok := p.next()
if tok.err != nil {
p.back()
return sf, nil
return nil, nil
}
var hashJoin bool // Special case for "HASH JOIN" syntax.
if tok.caseEqual("HASH") {
hashJoin = true
tok = p.next()
if tok.err != nil {
return nil, err
return nil, tok.err
}
}
var jt JoinType
Expand All @@ -2202,13 +2184,13 @@ func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
return nil, err
}
} else {
// Not a join
p.back()
return sf, nil
return nil, nil
}

sfj := SelectFromJoin{
Type: jt,
LHS: sf,
LHS: lhs,
}
var hints map[string]string
if hashJoin {
Expand All @@ -2225,11 +2207,13 @@ func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
}
sfj.Hints = hints

sfj.RHS, err = p.parseSelectFrom()
rhs, err := p.parseSelectFromTable()
if err != nil {
return nil, err
}

sfj.RHS = rhs

if p.eat("ON") {
sfj.On, err = p.parseBoolExpr()
if err != nil {
Expand All @@ -2249,6 +2233,46 @@ func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
return sfj, nil
}

func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
debugf("parseSelectFrom: %v", p)

/*
from_item: {
table_name [ table_hint_expr ] [ [ AS ] alias ] |
join |
( query_expr ) [ table_hint_expr ] [ [ AS ] alias ] |
field_path |
{ UNNEST( array_expression ) | UNNEST( array_path ) | array_path }
[ table_hint_expr ] [ [ AS ] alias ] [ WITH OFFSET [ [ AS ] alias ] ] |
with_query_name [ table_hint_expr ] [ [ AS ] alias ]
}
join:
from_item [ join_type ] [ join_method ] JOIN [ join_hint_expr ] from_item
[ ON bool_expression | USING ( join_column [, ...] ) ]
join_type:
{ INNER | CROSS | FULL [OUTER] | LEFT [OUTER] | RIGHT [OUTER] }
*/
leftHandSide, err := p.parseSelectFromTable()
if err != nil {
return nil, err
}
// Lets keep consuming joins until we no longer find more joins
for {
sfj, err := p.parseSelectFromJoin(leftHandSide)
if err != nil {
return nil, err
}
if sfj == nil {
// There was no join to consume
break
}
leftHandSide = sfj
}
return leftHandSide, nil
}

var joinKeywords = map[string]JoinType{
"INNER": InnerJoin,
"CROSS": CrossJoin,
Expand Down
28 changes: 28 additions & 0 deletions spanner/spansql/sql_test.go
Expand Up @@ -483,6 +483,34 @@ func TestSQL(t *testing.T) {
"SELECT A, B FROM Table1 INNER JOIN Table2 ON Table1.A = Table2.A",
reparseQuery,
},
{
Query{
Select: Select{
List: []Expr{
ID("A"), ID("B"),
},
From: []SelectFrom{
SelectFromJoin{
Type: InnerJoin,
LHS: SelectFromJoin{
Type: InnerJoin,
LHS: SelectFromTable{Table: "Table1"},
RHS: SelectFromTable{Table: "Table2"},
On: ComparisonOp{
LHS: PathExp{"Table1", "A"},
Op: Eq,
RHS: PathExp{"Table2", "A"},
},
},
RHS: SelectFromTable{Table: "Table3"},
Using: []ID{"X"},
},
},
},
},
"SELECT A, B FROM Table1 INNER JOIN Table2 ON Table1.A = Table2.A INNER JOIN Table3 USING (X)",
reparseQuery,
},
}
for _, test := range tests {
sql := test.data.SQL()
Expand Down

0 comments on commit 81a308e

Please sign in to comment.