Skip to content

Commit

Permalink
compaction: improve partition estimates for garbage collected sstables
Browse files Browse the repository at this point in the history
When a compaction strategy uses garbage collected sstables to track
expired tombstones, do not use complete partition estimates for them,
instead, use a fraction of it based on the droppable tombstone ratio
estimate.

Fixes scylladb#18283

Signed-off-by: Lakshmi Narayanan Sreethar <lakshmi.sreethar@scylladb.com>
  • Loading branch information
lkshminarayanan committed May 6, 2024
1 parent 5ca9a46 commit 020527a
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion compaction/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ class compaction {
uint64_t _compacting_data_file_size = 0;
api::timestamp_type _compacting_max_timestamp = api::min_timestamp;
uint64_t _estimated_partitions = 0;
double _estimated_droppable_tombstone_ratio = 0;
uint64_t _bloom_filter_checks = 0;
db::replay_position _rp;
encoding_stats_collector _stats_collector;
Expand Down Expand Up @@ -609,7 +610,8 @@ class compaction {
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
cfg.run_identifier = gc_run;
cfg.monitor = monitor.get();
auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats());
uint64_t estimated_partitions = std::max(1UL, uint64_t(ceil(partitions_per_sstable() * _estimated_droppable_tombstone_ratio)));
auto writer = sst->get_writer(*schema(), estimated_partitions, cfg, get_encoding_stats());
return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
}

Expand Down Expand Up @@ -753,6 +755,7 @@ class compaction {
// for a better estimate for the number of partitions in the merged
// sstable than just adding up the lengths of individual sstables.
_estimated_partitions += sst->get_estimated_key_count();
_estimated_droppable_tombstone_ratio += sst->estimate_droppable_tombstone_ratio(gc_clock::now(), _table_s.get_tombstone_gc_state(), _schema);
_compacting_data_file_size += sst->ondisk_data_size();
_compacting_max_timestamp = std::max(_compacting_max_timestamp, sst->get_stats_metadata().max_timestamp);
if (sst->originated_on_this_node().value_or(false) && sst_stats.position.shard_id() == this_shard_id()) {
Expand All @@ -764,6 +767,7 @@ class compaction {
log_debug("{} out of {} input sstables are fully expired sstables that will not be actually compacted",
_sstables.size() - ssts->size(), _sstables.size());
}
_estimated_droppable_tombstone_ratio /= ssts->size();

_compacting = std::move(ssts);

Expand Down

0 comments on commit 020527a

Please sign in to comment.