timescale · akuzm · Jan 24, 2024 · Jan 24, 2024 · Jan 24, 2024 · Jan 25, 2024
diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.c b/tsl/src/nodes/decompress_chunk/decompress_chunk.c
@@ -321,15 +321,16 @@ build_compressioninfo(PlannerInfo *root, Hypertable *ht, RelOptInfo *chunk_rel)
  * we put cost of 1 tuple of compressed_scan as startup cost
  */
 static void
-cost_decompress_chunk(Path *path, Path *compressed_path)
+cost_decompress_chunk(PlannerInfo *root, Path *path, Path *compressed_path)
 {
 	/* startup_cost is cost before fetching first tuple */
 	if (compressed_path->rows > 0)
 		path->startup_cost = compressed_path->total_cost / compressed_path->rows;
 
 	/* total_cost is cost for fetching all tuples */
 	path->total_cost = compressed_path->total_cost + path->rows * cpu_tuple_cost;
-	path->rows = compressed_path->rows * DECOMPRESS_CHUNK_BATCH_SIZE;
+	path->rows = compressed_path->rows * DECOMPRESS_CHUNK_BATCH_SIZE *
+				 clauselist_selectivity(root, path->parent->baserestrictinfo, 0, JOIN_INNER, NULL);
 }
 
 /* Smoothstep function S1 (the h01 cubic Hermite spline). */
@@ -899,7 +900,7 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
 						  work_mem,
 						  -1);
 
-				cost_decompress_chunk(&dcpath->custom_path.path, &sort_path);
+				cost_decompress_chunk(root, &dcpath->custom_path.path, &sort_path);
 			}
 			/*
 			 * if chunk is partially compressed don't add this now but add an append path later
@@ -1749,7 +1750,7 @@ decompress_chunk_path_create(PlannerInfo *root, CompressionInfo *info, int paral
 	path->custom_path.custom_paths = list_make1(compressed_path);
 	path->reverse = false;
 	path->compressed_pathkeys = NIL;
-	cost_decompress_chunk(&path->custom_path.path, compressed_path);
+	cost_decompress_chunk(root, &path->custom_path.path, compressed_path);
 
 	return path;
 }

diff --git a/tsl/src/nodes/decompress_chunk/qual_pushdown.c b/tsl/src/nodes/decompress_chunk/qual_pushdown.c
@@ -87,6 +87,18 @@ pushdown_quals(PlannerInfo *root, CompressionSettings *settings, RelOptInfo *chu
 		{
 			decompress_clauses = lappend(decompress_clauses, ri);
 		}
+
+		if (context.needs_recheck)
+		{
+			/*
+			 * If we managed to push down the comparison of orderby column
+			 * to the compressed scan, most matched batches are likely to
+			 * match entirely, so the selectivity of the recheck will be
+			 * close to 1.
+			 */
+			ri->norm_selec = 1;
+			Assert(context.can_pushdown);
+		}
 	}
 	chunk_rel->baserestrictinfo = decompress_clauses;
 }

diff --git a/tsl/test/expected/merge_append_partially_compressed-13.out b/tsl/test/expected/merge_append_partially_compressed-13.out
diff --git a/tsl/test/expected/merge_append_partially_compressed-14.out b/tsl/test/expected/merge_append_partially_compressed-14.out
diff --git a/tsl/test/expected/merge_append_partially_compressed-15.out b/tsl/test/expected/merge_append_partially_compressed-15.out
diff --git a/tsl/test/expected/merge_append_partially_compressed-16.out b/tsl/test/expected/merge_append_partially_compressed-16.out
diff --git a/tsl/test/expected/transparent_decompression_ordered_index-13.out b/tsl/test/expected/transparent_decompression_ordered_index-13.out
@@ -699,20 +699,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
                                                                                       QUERY PLAN                                                                                       
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop (actual rows=1 loops=1)
-   Join Filter: (nodetime.node = met.device_id)
+   Join Filter: (("*VALUES*".column2 = met.device_id_peer) AND ("*VALUES*".column3 = met.v0))
    ->  Nested Loop (actual rows=1 loops=1)
          Join Filter: (nodetime.node = "*VALUES*".column1)
          Rows Removed by Join Filter: 1
          ->  Seq Scan on nodetime (actual rows=1 loops=1)
          ->  Values Scan on "*VALUES*" (actual rows=2 loops=1)
    ->  Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
-         Filter: ("*VALUES*".column3 = v0)
-         Rows Removed by Filter: 47
          Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
+         Rows Removed by Filter: 47
          ->  Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
-               Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
+               Index Cond: (device_id = nodetime.node)
                Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
-(14 rows)
+(13 rows)
 
 -- filter on compressed attr (v0) with seqscan enabled and indexscan
 -- disabled. filters on compressed attr should be above the seq scan.

diff --git a/tsl/test/expected/transparent_decompression_ordered_index-14.out b/tsl/test/expected/transparent_decompression_ordered_index-14.out
@@ -699,20 +699,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
                                                                                       QUERY PLAN                                                                                       
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop (actual rows=1 loops=1)
-   Join Filter: (nodetime.node = met.device_id)
+   Join Filter: (("*VALUES*".column2 = met.device_id_peer) AND ("*VALUES*".column3 = met.v0))
    ->  Nested Loop (actual rows=1 loops=1)
          Join Filter: (nodetime.node = "*VALUES*".column1)
          Rows Removed by Join Filter: 1
          ->  Seq Scan on nodetime (actual rows=1 loops=1)
          ->  Values Scan on "*VALUES*" (actual rows=2 loops=1)
    ->  Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
-         Filter: ("*VALUES*".column3 = v0)
-         Rows Removed by Filter: 47
          Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
+         Rows Removed by Filter: 47
          ->  Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
-               Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
+               Index Cond: (device_id = nodetime.node)
                Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
-(14 rows)
+(13 rows)
 
 -- filter on compressed attr (v0) with seqscan enabled and indexscan
 -- disabled. filters on compressed attr should be above the seq scan.

diff --git a/tsl/test/expected/transparent_decompression_ordered_index-15.out b/tsl/test/expected/transparent_decompression_ordered_index-15.out
@@ -701,20 +701,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
                                                                                       QUERY PLAN                                                                                       
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop (actual rows=1 loops=1)
-   Join Filter: (nodetime.node = met.device_id)
+   Join Filter: (("*VALUES*".column2 = met.device_id_peer) AND ("*VALUES*".column3 = met.v0))
    ->  Nested Loop (actual rows=1 loops=1)
          Join Filter: (nodetime.node = "*VALUES*".column1)
          Rows Removed by Join Filter: 1
          ->  Seq Scan on nodetime (actual rows=1 loops=1)
          ->  Values Scan on "*VALUES*" (actual rows=2 loops=1)
    ->  Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
-         Filter: ("*VALUES*".column3 = v0)
-         Rows Removed by Filter: 47
          Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
+         Rows Removed by Filter: 47
          ->  Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
-               Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
+               Index Cond: (device_id = nodetime.node)
                Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
-(14 rows)
+(13 rows)
 
 -- filter on compressed attr (v0) with seqscan enabled and indexscan
 -- disabled. filters on compressed attr should be above the seq scan.

diff --git a/tsl/test/expected/transparent_decompression_ordered_index-16.out b/tsl/test/expected/transparent_decompression_ordered_index-16.out
@@ -701,20 +701,19 @@ ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
                                                                                       QUERY PLAN                                                                                       
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop (actual rows=1 loops=1)
-   Join Filter: (met.device_id = nodetime.node)
+   Join Filter: ((met.device_id_peer = "*VALUES*".column2) AND ("*VALUES*".column3 = met.v0))
    ->  Nested Loop (actual rows=1 loops=1)
          Join Filter: (nodetime.node = "*VALUES*".column1)
          Rows Removed by Join Filter: 1
          ->  Seq Scan on nodetime (actual rows=1 loops=1)
          ->  Values Scan on "*VALUES*" (actual rows=2 loops=1)
    ->  Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
-         Filter: ("*VALUES*".column3 = v0)
-         Rows Removed by Filter: 47
          Vectorized Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
+         Rows Removed by Filter: 47
          ->  Index Scan using compress_hyper_2_9_chunk_device_id_device_id_peer__ts_meta__idx on compress_hyper_2_9_chunk (actual rows=1 loops=1)
-               Index Cond: ((device_id = "*VALUES*".column1) AND (device_id_peer = "*VALUES*".column2))
+               Index Cond: (device_id = nodetime.node)
                Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone))
-(14 rows)
+(13 rows)
 
 -- filter on compressed attr (v0) with seqscan enabled and indexscan
 -- disabled. filters on compressed attr should be above the seq scan.