Skip to content

Commit

Permalink
compaction: improve partition estimates for garbage collected sstables
Browse files Browse the repository at this point in the history
When a compaction strategy uses garbage collected sstables to track
expired tombstones, do not use complete partition estimates for them,
instead, use a fraction of it based on the droppable tombstone ratio
estimate.

Fixes scylladb#18283

Signed-off-by: Lakshmi Narayanan Sreethar <lakshmi.sreethar@scylladb.com>

Closes scylladb#18465

(cherry picked from commit d39adf6)
  • Loading branch information
lkshminarayanan committed May 13, 2024
1 parent b68c06c commit 2286fc7
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions compaction/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ class compaction {
uint64_t _start_size = 0;
uint64_t _end_size = 0;
uint64_t _estimated_partitions = 0;
double _estimated_droppable_tombstone_ratio = 0;
uint64_t _bloom_filter_checks = 0;
db::replay_position _rp;
encoding_stats_collector _stats_collector;
Expand Down Expand Up @@ -584,6 +585,7 @@ class compaction {
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
cfg.run_identifier = gc_run;
cfg.monitor = monitor.get();
uint64_t estimated_partitions = std::max(1UL, uint64_t(ceil(partitions_per_sstable() * _estimated_droppable_tombstone_ratio)));
auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats(), priority);
return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
}
Expand Down Expand Up @@ -654,6 +656,7 @@ class compaction {
auto fully_expired = _table_s.fully_expired_sstables(_sstables, gc_clock::now());
min_max_tracker<api::timestamp_type> timestamp_tracker;

double sum_of_estimated_droppable_tombstone_ratio = 0;
_input_sstable_generations.reserve(_sstables.size());
for (auto& sst : _sstables) {
co_await coroutine::maybe_yield();
Expand Down Expand Up @@ -688,12 +691,16 @@ class compaction {
// this is kind of ok, esp. since we will hopefully not be trying to recover based on
// compacted sstables anyway (CL should be clean by then).
_rp = std::max(_rp, sst_stats.position);
auto gc_before = sst->get_gc_before_for_drop_estimation(gc_clock::now(), _table_s.get_tombstone_gc_state());
sum_of_estimated_droppable_tombstone_ratio += sst->estimate_droppable_tombstone_ratio(gc_before);
}
log_info("{} {}", report_start_desc(), formatted_msg);
if (ssts->all()->size() < _sstables.size()) {
log_debug("{} out of {} input sstables are fully expired sstables that will not be actually compacted",
_sstables.size() - ssts->all()->size(), _sstables.size());
}
// _estimated_droppable_tombstone_ratio could exceed 1.0 in certain cases, so limit it to 1.0.
_estimated_droppable_tombstone_ratio = std::min(1.0, sum_of_estimated_droppable_tombstone_ratio / ssts->all()->size());

_compacting = std::move(ssts);

Expand Down

0 comments on commit 2286fc7

Please sign in to comment.