Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hogql): support join USING #21916

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions hogql_parser/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,9 @@ The three pages below are must-reads though. They're key to writing production-r
```bash
pytest posthog/hogql/
```

## How to install dependencies on Ubuntu

Antlr runtime provided in Ubuntu packages might be of an older version, which results in compilation errors.

In that case run commands from [this step](https://github.com/PostHog/posthog/blob/4fba6a63e351131fdb27b85e7ba436446fdb3093/.github/actions/run-backend-tests/action.yml#L100).
5 changes: 1 addition & 4 deletions hogql_parser/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,9 +857,6 @@ class HogQLParseTreeConverter : public HogQLParserBaseVisitor {
VISIT_UNSUPPORTED(JoinOpCross)

VISIT(JoinConstraintClause) {
if (ctx->USING()) {
throw NotImplementedError("Unsupported: JOIN ... USING");
}
PyObject* column_expr_list = visitAsPyObject(ctx->columnExprList());
Py_ssize_t column_expr_list_size = PyList_Size(column_expr_list);
if (column_expr_list_size == -1) {
Expand All @@ -872,7 +869,7 @@ class HogQLParseTreeConverter : public HogQLParserBaseVisitor {
}
PyObject* expr = Py_NewRef(PyList_GET_ITEM(column_expr_list, 0));
Py_DECREF(column_expr_list);
RETURN_NEW_AST_NODE("JoinConstraint", "{s:N}", "expr", expr);
RETURN_NEW_AST_NODE("JoinConstraint", "{s:N,s:s}", "expr", expr, "constraint_type", ctx->USING() ? "USING" : "ON");
}

VISIT(SampleClause) {
Expand Down
2 changes: 1 addition & 1 deletion hogql_parser/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

setup(
name="hogql_parser",
version="1.0.7",
version="1.0.8",
url="https://github.com/PostHog/posthog/tree/master/hogql_parser",
author="PostHog Inc.",
author_email="hey@posthog.com",
Expand Down
11 changes: 7 additions & 4 deletions mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,6 @@ posthog/hogql/resolver.py:0: error: Incompatible types in assignment (expression
posthog/hogql/resolver.py:0: error: Argument 1 to "visit" of "Resolver" has incompatible type "JoinExpr | None"; expected "Expr" [arg-type]
posthog/hogql/resolver.py:0: error: Statement is unreachable [unreachable]
posthog/hogql/resolver.py:0: error: Item "None" of "JoinExpr | None" has no attribute "join_type" [union-attr]
posthog/hogql/resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "JoinConstraint | None") [assignment]
posthog/hogql/resolver.py:0: error: Argument 1 to "visit" of "Resolver" has incompatible type "JoinConstraint | None"; expected "Expr" [arg-type]
posthog/hogql/resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SampleExpr | None") [assignment]
posthog/hogql/resolver.py:0: error: Argument 1 to "visit" of "Resolver" has incompatible type "SampleExpr | None"; expected "Expr" [arg-type]
posthog/hogql/resolver.py:0: error: Argument 1 to "visit" of "Visitor" has incompatible type "SelectQuery | SelectUnionQuery | Field | None"; expected "AST" [arg-type]
Expand All @@ -230,8 +228,6 @@ posthog/hogql/resolver.py:0: error: Incompatible types in assignment (expression
posthog/hogql/resolver.py:0: error: Argument 1 to "append" of "list" has incompatible type "BaseTableType | SelectUnionQueryType | SelectQueryType | SelectQueryAliasType | SelectViewType | None"; expected "SelectQueryType | SelectUnionQueryType" [arg-type]
posthog/hogql/resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "JoinExpr | None") [assignment]
posthog/hogql/resolver.py:0: error: Argument 1 to "visit" of "Resolver" has incompatible type "JoinExpr | None"; expected "Expr" [arg-type]
posthog/hogql/resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "JoinConstraint | None") [assignment]
posthog/hogql/resolver.py:0: error: Argument 1 to "visit" of "Resolver" has incompatible type "JoinConstraint | None"; expected "Expr" [arg-type]
posthog/hogql/resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SampleExpr | None") [assignment]
posthog/hogql/resolver.py:0: error: Argument 1 to "visit" of "Resolver" has incompatible type "SampleExpr | None"; expected "Expr" [arg-type]
posthog/hogql/resolver.py:0: error: Argument 2 to "convert_hogqlx_tag" has incompatible type "int | None"; expected "int" [arg-type]
Expand Down Expand Up @@ -514,6 +510,13 @@ posthog/hogql/test/test_resolver.py:0: error: Incompatible types in assignment (
posthog/hogql/test/test_resolver.py:0: error: "TestResolver" has no attribute "snapshot" [attr-defined]
posthog/hogql/test/test_resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SelectQuery") [assignment]
posthog/hogql/test/test_resolver.py:0: error: "TestResolver" has no attribute "snapshot" [attr-defined]
posthog/hogql/test/test_resolver.py:0: error: Item "None" of "JoinExpr | None" has no attribute "next_join" [union-attr]
Copy link
Contributor Author

@nikitaevg nikitaevg May 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it doesn't make sense and will reduce readability if I asserted in tests that these fields are not None, but I could do it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup that's fine. I agree that additional asserts wouldn't help code quality.

posthog/hogql/test/test_resolver.py:0: error: Item "None" of "JoinExpr | Any | None" has no attribute "constraint" [union-attr]
posthog/hogql/test/test_resolver.py:0: error: Item "None" of "JoinConstraint | Any | None" has no attribute "constraint_type" [union-attr]
posthog/hogql/test/test_resolver.py:0: error: Item "None" of "JoinConstraint | Any | None" has no attribute "expr" [union-attr]
posthog/hogql/test/test_resolver.py:0: error: Item "None" of "JoinExpr | None" has no attribute "next_join" [union-attr]
posthog/hogql/test/test_resolver.py:0: error: Item "None" of "JoinExpr | Any | None" has no attribute "constraint" [union-attr]
posthog/hogql/test/test_resolver.py:0: error: Item "None" of "JoinConstraint | Any | None" has no attribute "constraint_type" [union-attr]
posthog/hogql/test/test_resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SelectQuery") [assignment]
posthog/hogql/test/test_resolver.py:0: error: "TestResolver" has no attribute "snapshot" [attr-defined]
posthog/hogql/test/test_resolver.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SelectQuery") [assignment]
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ class Call(Expr):
@dataclass(kw_only=True)
class JoinConstraint(Expr):
expr: Expr
constraint_type: Literal["ON", "USING"]


@dataclass(kw_only=True)
Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/event_sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ def join_with_events_table_session_duration(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "$session_id"]),
right=ast.Field(chain=[to_table, "id"]),
)
),
constraint_type="ON",
)

return join_expr
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def join_with_group_table(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, f"$group_{group_index}"]),
right=ast.Field(chain=[to_table, "key"]),
)
),
constraint_type="ON",
)

return join_expr
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def join_with_person_distinct_id_overrides_table(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "distinct_id"]),
right=ast.Field(chain=[to_table, "distinct_id"]),
)
),
constraint_type="ON",
)
return join_expr

Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/person_distinct_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def join_with_person_distinct_ids_table(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "distinct_id"]),
right=ast.Field(chain=[to_table, "distinct_id"]),
)
),
constraint_type="ON",
)
return join_expr

Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/person_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def join_with_person_overrides_table(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "event_person_id"]),
right=ast.Field(chain=[to_table, "old_person_id"]),
)
),
constraint_type="ON",
)
return join_expr

Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ def join_with_persons_table(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "person_id"]),
right=ast.Field(chain=[to_table, "id"]),
)
),
constraint_type="ON",
)
return join_expr

Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/persons_pdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def persons_pdi_join(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "id"]),
right=ast.Field(chain=[to_table, "person_id"]),
)
),
constraint_type="ON",
)
return join_expr

Expand Down
6 changes: 4 additions & 2 deletions posthog/hogql/database/schema/session_replay_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

select_fields: list[ast.Expr] = [
ast.Alias(alias=name, expr=ast.Field(chain=chain)) for name, chain in requested_fields.items()
]

Check failure on line 39 in posthog/hogql/database/schema/session_replay_events.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Missing named argument "constraint_type" for "JoinConstraint"
select_query = SelectQuery(
select=select_fields,
select_from=ast.JoinExpr(table=ast.Field(chain=["events"])),
Expand All @@ -51,7 +51,8 @@
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "session_id"]),
right=ast.Field(chain=[to_table, "$session_id"]),
)
),
constraint_type="ON",
)

return join_expr
Expand Down Expand Up @@ -103,7 +104,8 @@
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "session_id"]),
right=ast.Field(chain=[to_table, "log_source_id"]),
)
),
constraint_type="ON",
)

return join_expr
Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ def join_events_table_to_sessions_table(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "$session_id"]),
right=ast.Field(chain=[to_table, "session_id"]),
)
),
constraint_type="ON",
)
return join_expr

Expand Down
4 changes: 1 addition & 3 deletions posthog/hogql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,12 +366,10 @@ def visitJoinOpCross(self, ctx: HogQLParser.JoinOpCrossContext):
raise NotImplementedError(f"Unsupported node: JoinOpCross")

def visitJoinConstraintClause(self, ctx: HogQLParser.JoinConstraintClauseContext):
if ctx.USING():
raise NotImplementedError(f"Unsupported: JOIN ... USING")
column_expr_list = self.visit(ctx.columnExprList())
if len(column_expr_list) != 1:
raise NotImplementedError(f"Unsupported: JOIN ... ON with multiple expressions")
return ast.JoinConstraint(expr=column_expr_list[0])
return ast.JoinConstraint(expr=column_expr_list[0], constraint_type="USING" if ctx.USING() else "ON")

def visitSampleClause(self, ctx: HogQLParser.SampleClauseContext):
ratio_expressions = ctx.ratioExpr()
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def visit_join_expr(self, node: ast.JoinExpr) -> JoinExprResponse:
join_strings.append(sample_clause)

if node.constraint is not None:
join_strings.append(f"ON {self.visit(node.constraint)}")
join_strings.append(f"{node.constraint.constraint_type} {self.visit(node.constraint)}")

return JoinExprResponse(printed_sql=" ".join(join_strings), where=extra_where)

Expand Down
16 changes: 13 additions & 3 deletions posthog/hogql/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,15 @@ def visit_join_expr(self, node: ast.JoinExpr):
node_type = ast.TableAliasType(alias=table_alias, table_type=node_table_type)
else:
node_type = node_table_type

node = cast(ast.JoinExpr, clone_expr(node))
if node.constraint and node.constraint.constraint_type == "USING":
# visit USING constraint before adding the table to avoid ambiguous names
node.constraint = self.visit_join_constraint(node.constraint)

scope.tables[table_alias] = node_type

# :TRICKY: Make sure to clone and visit _all_ JoinExpr fields/nodes.
node = cast(ast.JoinExpr, clone_expr(node))
node.type = node_type
node.table = cast(ast.Field, clone_expr(node.table))
node.table.type = node_table_type
Expand All @@ -330,7 +335,8 @@ def visit_join_expr(self, node: ast.JoinExpr):
if is_global:
node.next_join.join_type = "GLOBAL JOIN"

node.constraint = self.visit(node.constraint)
if node.constraint and node.constraint.constraint_type == "ON":
node.constraint = self.visit_join_constraint(node.constraint)
node.sample = self.visit(node.sample)

# In case we had a function call table, and had to add an alias where none was present, mark it here
Expand All @@ -341,6 +347,9 @@ def visit_join_expr(self, node: ast.JoinExpr):

elif isinstance(node.table, ast.SelectQuery) or isinstance(node.table, ast.SelectUnionQuery):
node = cast(ast.JoinExpr, clone_expr(node))
if node.constraint and node.constraint.constraint_type == "USING":
# visit USING constraint before adding the table to avoid ambiguous names
node.constraint = self.visit_join_constraint(node.constraint)

node.table = super().visit(node.table)
if isinstance(node.table, ast.SelectQuery) and node.table.view_name is not None and node.alias is not None:
Expand All @@ -365,7 +374,8 @@ def visit_join_expr(self, node: ast.JoinExpr):

# :TRICKY: Make sure to clone and visit _all_ JoinExpr fields/nodes.
node.next_join = self.visit(node.next_join)
node.constraint = self.visit(node.constraint)
if node.constraint and node.constraint.constraint_type == "ON":
node.constraint = self.visit_join_constraint(node.constraint)
node.sample = self.visit(node.sample)

return node
Expand Down
28 changes: 22 additions & 6 deletions posthog/hogql/test/_test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ def test_select_from_join(self):
next_join=ast.JoinExpr(
join_type="JOIN",
table=ast.Field(chain=["events2"]),
constraint=ast.JoinConstraint(expr=ast.Constant(value=1)),
constraint=ast.JoinConstraint(expr=ast.Constant(value=1), constraint_type="ON"),
),
),
),
Expand All @@ -804,7 +804,7 @@ def test_select_from_join(self):
next_join=ast.JoinExpr(
join_type="LEFT OUTER JOIN",
table=ast.Field(chain=["events2"]),
constraint=ast.JoinConstraint(expr=ast.Constant(value=1)),
constraint=ast.JoinConstraint(expr=ast.Constant(value=1), constraint_type="ON"),
),
),
),
Expand All @@ -818,16 +818,30 @@ def test_select_from_join(self):
next_join=ast.JoinExpr(
join_type="LEFT OUTER JOIN",
table=ast.Field(chain=["events2"]),
constraint=ast.JoinConstraint(expr=ast.Constant(value=1)),
constraint=ast.JoinConstraint(expr=ast.Constant(value=1), constraint_type="ON"),
next_join=ast.JoinExpr(
join_type="RIGHT ANY JOIN",
table=ast.Field(chain=["events3"]),
constraint=ast.JoinConstraint(expr=ast.Constant(value=2)),
constraint=ast.JoinConstraint(expr=ast.Constant(value=2), constraint_type="ON"),
),
),
),
),
)
self.assertEqual(
self._select("select 1 from events JOIN events2 USING 1"),
ast.SelectQuery(
select=[ast.Constant(value=1)],
select_from=ast.JoinExpr(
table=ast.Field(chain=["events"]),
next_join=ast.JoinExpr(
join_type="JOIN",
table=ast.Field(chain=["events2"]),
constraint=ast.JoinConstraint(expr=ast.Constant(value=1), constraint_type="USING"),
),
),
),
)

def test_select_from_join_multiple(self):
node = self._select(
Expand Down Expand Up @@ -863,7 +877,8 @@ def test_select_from_join_multiple(self):
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=["pdi", "distinct_id"]),
right=ast.Field(chain=["e", "distinct_id"]),
)
),
constraint_type="ON",
),
next_join=ast.JoinExpr(
join_type="LEFT JOIN",
Expand All @@ -874,7 +889,8 @@ def test_select_from_join_multiple(self):
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=["p", "id"]),
right=ast.Field(chain=["pdi", "person_id"]),
)
),
constraint_type="ON",
),
),
),
Expand Down
13 changes: 13 additions & 0 deletions posthog/hogql/test/test_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,19 @@ def test_ctes_with_union_all(self):
"""),
)

def test_join_using(self):
node = self._select(
"WITH my_table AS (SELECT 1 AS a) SELECT q1.a FROM my_table AS q1 INNER JOIN my_table AS q2 USING a"
)
node = cast(ast.SelectQuery, resolve_types(node, self.context, dialect="clickhouse"))
constraint = cast(ast.SelectQuery, node).select_from.next_join.constraint
assert constraint.constraint_type == "USING"
assert cast(ast.Field, cast(ast.Alias, constraint.expr).expr).chain == ["a"]

node = self._select("SELECT q1.event FROM events AS q1 INNER JOIN events AS q2 USING event")
node = cast(ast.SelectQuery, resolve_types(node, self.context, dialect="clickhouse"))
assert cast(ast.SelectQuery, node).select_from.next_join.constraint.constraint_type == "USING"

@override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False)
@pytest.mark.usefixtures("unittest_snapshot")
def test_asterisk_expander_table(self):
Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/test/test_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def test_everything_visitor(self):
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=["d"]),
right=ast.Field(chain=["e"]),
)
),
constraint_type="ON",
),
),
sample=ast.SampleExpr(
Expand Down
6 changes: 4 additions & 2 deletions posthog/hogql/transforms/in_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ def get_dynamic_cohort_clause():
op=ast.CompareOperationOp.Eq,
left=ast.Constant(value=1),
right=ast.Constant(value=1),
)
),
constraint_type="ON",
),
)

Expand Down Expand Up @@ -377,7 +378,8 @@ def _add_join_for_cohort(
op=ast.CompareOperationOp.Eq,
left=ast.Constant(value=1),
right=ast.Constant(value=1),
)
),
constraint_type="ON",
),
)
new_join = cast(
Expand Down
4 changes: 2 additions & 2 deletions posthog/hogql/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,8 @@ def visit_window_frame_expr(self, node: ast.WindowFrameExpr):
frame_value=node.frame_value,
)

def visit_join_constraint(self, node: ast.JoinConstraint):
return ast.JoinConstraint(expr=self.visit(node.expr))
def visit_join_constraint(self, node: ast.JoinConstraint) -> ast.JoinConstraint:
return ast.JoinConstraint(expr=self.visit(node.expr), constraint_type=node.constraint_type)

def visit_hogqlx_tag(self, node: ast.HogQLXTag):
return ast.HogQLXTag(kind=node.kind, attributes=[self.visit(a) for a in node.attributes])
Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql_queries/actors_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ def source_table_join(self) -> ast.JoinExpr:
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[self.strategy.origin, self.strategy.origin_id]),
right=ast.Field(chain=[source_alias, *source_id_chain]),
)
),
constraint_type="ON",
),
),
)
Expand Down