Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Account for filters in DecompressChunk row estimates #6563

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
9 changes: 5 additions & 4 deletions tsl/src/nodes/decompress_chunk/decompress_chunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,16 @@ build_compressioninfo(PlannerInfo *root, Hypertable *ht, RelOptInfo *chunk_rel)
* we put cost of 1 tuple of compressed_scan as startup cost
*/
static void
cost_decompress_chunk(Path *path, Path *compressed_path)
cost_decompress_chunk(PlannerInfo *root, Path *path, Path *compressed_path)
{
/* startup_cost is cost before fetching first tuple */
if (compressed_path->rows > 0)
path->startup_cost = compressed_path->total_cost / compressed_path->rows;

/* total_cost is cost for fetching all tuples */
path->total_cost = compressed_path->total_cost + path->rows * cpu_tuple_cost;
path->rows = compressed_path->rows * DECOMPRESS_CHUNK_BATCH_SIZE;
path->rows = compressed_path->rows * DECOMPRESS_CHUNK_BATCH_SIZE *
clauselist_selectivity(root, path->parent->baserestrictinfo, 0, JOIN_INNER, NULL);
}

/* Smoothstep function S1 (the h01 cubic Hermite spline). */
Expand Down Expand Up @@ -899,7 +900,7 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
work_mem,
-1);

cost_decompress_chunk(&dcpath->custom_path.path, &sort_path);
cost_decompress_chunk(root, &dcpath->custom_path.path, &sort_path);
}
/*
* if chunk is partially compressed don't add this now but add an append path later
Expand Down Expand Up @@ -1749,7 +1750,7 @@ decompress_chunk_path_create(PlannerInfo *root, CompressionInfo *info, int paral
path->custom_path.custom_paths = list_make1(compressed_path);
path->reverse = false;
path->compressed_pathkeys = NIL;
cost_decompress_chunk(&path->custom_path.path, compressed_path);
cost_decompress_chunk(root, &path->custom_path.path, compressed_path);

return path;
}
Expand Down
12 changes: 12 additions & 0 deletions tsl/src/nodes/decompress_chunk/qual_pushdown.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ pushdown_quals(PlannerInfo *root, CompressionSettings *settings, RelOptInfo *chu
{
decompress_clauses = lappend(decompress_clauses, ri);
}

if (context.needs_recheck)
{
/*
* If we managed to push down the comparison of orderby column
* to the compressed scan, most matched batches are likely to
* match entirely, so the selectivity of the recheck will be
* close to 1.
*/
ri->norm_selec = 1;
Assert(context.can_pushdown);
}
}
chunk_rel->baserestrictinfo = decompress_clauses;
}
Expand Down
915 changes: 412 additions & 503 deletions tsl/test/expected/merge_append_partially_compressed-13.out

Large diffs are not rendered by default.

917 changes: 413 additions & 504 deletions tsl/test/expected/merge_append_partially_compressed-14.out

Large diffs are not rendered by default.

925 changes: 416 additions & 509 deletions tsl/test/expected/merge_append_partially_compressed-15.out

Large diffs are not rendered by default.

925 changes: 416 additions & 509 deletions tsl/test/expected/merge_append_partially_compressed-16.out

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -699,20 +699,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = met.device_id)
Join Filter: (("*VALUES*".column2 = met.device_id_peer) AND ("*VALUES*".column3 = met.v0))
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = "*VALUES*".column1)
Rows Removed by Join Filter: 1
-> Seq Scan on nodetime (actual rows=1 loops=1)
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
Filter: ("*VALUES*".column3 = v0)
Rows Removed by Filter: 47
Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
Rows Removed by Filter: 47
-> Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
Index Cond: (device_id = nodetime.node)
Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
(14 rows)
(13 rows)

-- filter on compressed attr (v0) with seqscan enabled and indexscan
-- disabled. filters on compressed attr should be above the seq scan.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,20 +699,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = met.device_id)
Join Filter: (("*VALUES*".column2 = met.device_id_peer) AND ("*VALUES*".column3 = met.v0))
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = "*VALUES*".column1)
Rows Removed by Join Filter: 1
-> Seq Scan on nodetime (actual rows=1 loops=1)
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
Filter: ("*VALUES*".column3 = v0)
Rows Removed by Filter: 47
Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
Rows Removed by Filter: 47
-> Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
Index Cond: (device_id = nodetime.node)
Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
(14 rows)
(13 rows)

-- filter on compressed attr (v0) with seqscan enabled and indexscan
-- disabled. filters on compressed attr should be above the seq scan.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -701,20 +701,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = met.device_id)
Join Filter: (("*VALUES*".column2 = met.device_id_peer) AND ("*VALUES*".column3 = met.v0))
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = "*VALUES*".column1)
Rows Removed by Join Filter: 1
-> Seq Scan on nodetime (actual rows=1 loops=1)
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
Filter: ("*VALUES*".column3 = v0)
Rows Removed by Filter: 47
Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
Rows Removed by Filter: 47
-> Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
Index Cond: (device_id = nodetime.node)
Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
(14 rows)
(13 rows)

-- filter on compressed attr (v0) with seqscan enabled and indexscan
-- disabled. filters on compressed attr should be above the seq scan.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -701,20 +701,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
Join Filter: (met.device_id = nodetime.node)
Join Filter: ((met.device_id_peer = "*VALUES*".column2) AND ("*VALUES*".column3 = met.v0))
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = "*VALUES*".column1)
Rows Removed by Join Filter: 1
-> Seq Scan on nodetime (actual rows=1 loops=1)
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
Filter: ("*VALUES*".column3 = v0)
Rows Removed by Filter: 47
Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
Rows Removed by Filter: 47
-> Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
Index Cond: (device_id = nodetime.node)
Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
(14 rows)
(13 rows)

-- filter on compressed attr (v0) with seqscan enabled and indexscan
-- disabled. filters on compressed attr should be above the seq scan.
Expand Down