Skip to content

Commit

Permalink
[#22075] YSQL: Reenable rechecking on RowComparison filters
Browse files Browse the repository at this point in the history
Summary:
The previous method of binding a RowCompareExpression as a docdb scan bound is a bit inaccurate when considering NULL's. The semantics of RowCompareExpression that conflict with this can be found in the following examples:

```
SELECT (9, 8) >= (9, NULL); -> returns null
SELECT (10, 8) >= (9, NULL); -> returns true
SELECT (7, 8) >= (9, NULL); -> returns false
```

So if the column at which the last comparison is done in the RowCompareExpression has a NULL input, the whole result of the expression becomes NULL. There is no easy way to check this in DocDB without pushing down some rechecking expression. For that reason, we are simply reenabling rechecking for RowCompareExpressions in `yb_scan.c`.

Also, 5595277/D34007 introduced pushing down an `IS_NOT_NULL` operator for every column in an RC expression. This is only accurate to do on the left-most column. We correct that in this change as well.

Needs backports to: 2024.1, 2.20, 2.18
Jira: DB-10996

Test Plan: ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressIndex'

Reviewers: mtakahara, telgersma

Reviewed By: mtakahara

Subscribers: yql

Differential Revision: https://phorge.dev.yugabyte.com/D34917
  • Loading branch information
tanujnay112 committed May 13, 2024
1 parent c146c41 commit 718f58d
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 14 deletions.
5 changes: 3 additions & 2 deletions src/postgres/src/backend/access/yb_access/yb_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ YbBindRowComparisonKeys(YbScanDesc ybScan, YbScanPlan scan_plan,
}
}

bool needs_recheck = !can_pushdown;
bool needs_recheck = true;

if (can_pushdown)
{
Expand Down Expand Up @@ -1586,7 +1586,8 @@ YbBindRowComparisonKeys(YbScanDesc ybScan, YbScanPlan scan_plan,
{
int att_idx = YBAttnumToBmsIndex(
ybScan->relation, subkeys[subkey_index]->sk_attno);
is_not_null[att_idx] = true;
/* Set the first column in this RC to not null. */
is_not_null[att_idx] |= subkey_index == 0;

subkey_index++;
}
Expand Down
138 changes: 128 additions & 10 deletions src/postgres/src/test/regress/expected/yb_index_scan.out
Original file line number Diff line number Diff line change
Expand Up @@ -3570,12 +3570,14 @@ DROP TABLE pk_range_int_text;
CREATE TABLE null_test(a int, b int);
CREATE INDEX ON null_test(a asc, b asc);
INSERT INTO null_test VALUES (NULL, 9), (9, NULL), (9,8), (10,9);
EXPLAIN (COSTS OFF) SELECT * FROM null_test WHERE (a,b) >= (9, 8);
QUERY PLAN
------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test
EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) >= (9, 8);
QUERY PLAN
------------------------------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test (actual rows=2 loops=1)
Index Cond: (ROW(a, b) >= ROW(9, 8))
(2 rows)
Rows Removed by Index Recheck: 1
Heap Fetches: 0
(4 rows)

SELECT * FROM null_test WHERE (a,b) >= (9, 8);
a | b
Expand All @@ -3584,19 +3586,135 @@ SELECT * FROM null_test WHERE (a,b) >= (9, 8);
10 | 9
(2 rows)

EXPLAIN (COSTS OFF) SELECT * FROM null_test WHERE (a,b) <= (9, 8);
QUERY PLAN
------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test
EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) <= (9, 8);
QUERY PLAN
------------------------------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test (actual rows=1 loops=1)
Index Cond: (ROW(a, b) <= ROW(9, 8))
(2 rows)
Heap Fetches: 0
(3 rows)

SELECT * FROM null_test WHERE (a,b) <= (9, 8);
a | b
---+---
9 | 8
(1 row)

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) <= (9, 8);
QUERY PLAN
------------------------------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test (actual rows=1 loops=1)
Index Cond: (ROW(a, b) <= ROW(9, 8))
Heap Fetches: 0
(3 rows)

SELECT * FROM null_test WHERE (a,b) <= (9, 8);
a | b
---+---
9 | 8
(1 row)

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) >= (8, 9);
QUERY PLAN
------------------------------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test (actual rows=3 loops=1)
Index Cond: (ROW(a, b) >= ROW(8, 9))
Heap Fetches: 0
(3 rows)

SELECT * FROM null_test WHERE (a,b) >= (8, 9);
a | b
----+---
9 | 8
9 |
10 | 9
(3 rows)

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE (a,b) >= (8, 9);
QUERY PLAN
------------------------------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test (actual rows=3 loops=1)
Index Cond: (ROW(a, b) >= ROW(8, 9))
Heap Fetches: 0
(3 rows)

/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE (a,b) >= (8, 9);
a | b
----+---
9 | 8
9 |
10 | 9
(3 rows)

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE a > 8 OR (a = 8 AND b >= NULL);
QUERY PLAN
------------------------------------------------------------------------------
Index Only Scan using null_test_a_b_idx on null_test (actual rows=3 loops=1)
Index Cond: (a > 8)
Heap Fetches: 0
(3 rows)

/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE a > 8 OR (a = 8 AND b >= NULL);
a | b
----+---
9 | 8
9 |
10 | 9
(3 rows)

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON (t1.a, t1.b) >= (t2.a, t2.b);
QUERY PLAN
---------------------------------------------------------------------------------------
Nested Loop (actual rows=4 loops=1)
-> Index Only Scan using null_test_a_b_idx on null_test t1 (actual rows=4 loops=1)
Heap Fetches: 0
-> Index Only Scan using null_test_a_b_idx on null_test t2 (actual rows=1 loops=4)
Index Cond: (ROW(a, b) <= ROW(t1.a, t1.b))
Heap Fetches: 0
(6 rows)

/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON (t1.a, t1.b) >= (t2.a, t2.b);
a | b | a | b
----+---+----+---
9 | 8 | 9 | 8
10 | 9 | 9 | 8
10 | 9 | 9 |
10 | 9 | 10 | 9
(4 rows)

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON t1.a > t2.a OR (t1.a = t2.a AND t1.b >= t2.b);
QUERY PLAN
---------------------------------------------------------------------------------------
Nested Loop (actual rows=4 loops=1)
Join Filter: ((t1.a > t2.a) OR ((t1.a = t2.a) AND (t1.b >= t2.b)))
Rows Removed by Join Filter: 12
-> Index Only Scan using null_test_a_b_idx on null_test t1 (actual rows=4 loops=1)
Heap Fetches: 0
-> Index Only Scan using null_test_a_b_idx on null_test t2 (actual rows=4 loops=4)
Heap Fetches: 0
(7 rows)

/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON t1.a > t2.a OR (t1.a = t2.a AND t1.b >= t2.b);
a | b | a | b
----+---+----+---
9 | 8 | 9 | 8
10 | 9 | 9 | 8
10 | 9 | 9 |
10 | 9 | 10 | 9
(4 rows)

DROP TABLE null_test;
-- make sure row comparisons don't operate on hash keys yet
CREATE TABLE pk_hash_range_int (h int, r1 int, r2 int, r3 int, PRIMARY KEY(h hash, r1 asc, r2 asc, r3 asc));
Expand Down
41 changes: 39 additions & 2 deletions src/postgres/src/test/regress/sql/yb_index_scan.sql
Original file line number Diff line number Diff line change
Expand Up @@ -387,10 +387,47 @@ DROP TABLE pk_range_int_text;
CREATE TABLE null_test(a int, b int);
CREATE INDEX ON null_test(a asc, b asc);
INSERT INTO null_test VALUES (NULL, 9), (9, NULL), (9,8), (10,9);
EXPLAIN (COSTS OFF) SELECT * FROM null_test WHERE (a,b) >= (9, 8);
EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) >= (9, 8);
SELECT * FROM null_test WHERE (a,b) >= (9, 8);
EXPLAIN (COSTS OFF) SELECT * FROM null_test WHERE (a,b) <= (9, 8);

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) <= (9, 8);
SELECT * FROM null_test WHERE (a,b) <= (9, 8);

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) <= (9, 8);
SELECT * FROM null_test WHERE (a,b) <= (9, 8);

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT * FROM null_test WHERE (a,b) >= (8, 9);
SELECT * FROM null_test WHERE (a,b) >= (8, 9);

EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE (a,b) >= (8, 9);
/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE (a,b) >= (8, 9);


EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE a > 8 OR (a = 8 AND b >= NULL);

/*+ IndexOnlyScan(null_test) */
SELECT * FROM null_test WHERE a > 8 OR (a = 8 AND b >= NULL);


EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON (t1.a, t1.b) >= (t2.a, t2.b);

/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON (t1.a, t1.b) >= (t2.a, t2.b);


EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE)
/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON t1.a > t2.a OR (t1.a = t2.a AND t1.b >= t2.b);

/*+ Set(enable_material OFF) Leading((t1 t2)) IndexOnlyScan(t1) IndexOnlyScan(t2) */
SELECT * FROM null_test t1 JOIN null_test t2 ON t1.a > t2.a OR (t1.a = t2.a AND t1.b >= t2.b);
DROP TABLE null_test;

-- make sure row comparisons don't operate on hash keys yet
Expand Down

0 comments on commit 718f58d

Please sign in to comment.