Skip to content

Commit

Permalink
compaction: improve partition estimates when using garbage collected …
Browse files Browse the repository at this point in the history
…sstables

When a compaction strategy uses garbage collected sstables to track
expired tombstones, do not use complete partition estimates for both the
regular output sstables and the temporary sstables. Instead split the
partition estimate between the two based on the droppable tombstone
ratio estimate.

Fixes scylladb#18283

Signed-off-by: Lakshmi Narayanan Sreethar <lakshmi.sreethar@scylladb.com>
  • Loading branch information
lkshminarayanan committed Apr 29, 2024
1 parent af0c0ee commit 8905116
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions compaction/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,15 @@ class compaction {
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions, _schema));
}

double average_estimate_of_droppable_tombstone_ratio() const {
double total_estimate = 0;
auto compaction_time = gc_clock::now();
for (const auto& sst : _sstables) {
total_estimate += sst->estimate_droppable_tombstone_ratio(compaction_time, _table_s.get_tombstone_gc_state(), _schema);
}
return total_estimate / _sstables.size();
}

void setup_new_sstable(shared_sstable& sst) {
_all_new_sstables.push_back(sst);
_new_partial_sstables.insert(sst);
Expand Down Expand Up @@ -609,7 +618,8 @@ class compaction {
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
cfg.run_identifier = gc_run;
cfg.monitor = monitor.get();
auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats());
uint64_t estimated_partitions = std::max(1UL, uint64_t(ceil(partitions_per_sstable() * average_estimate_of_droppable_tombstone_ratio())));
auto writer = sst->get_writer(*schema(), estimated_partitions, cfg, get_encoding_stats());
return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
}

Expand Down Expand Up @@ -1117,7 +1127,11 @@ class regular_compaction : public compaction {
auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
sstable_writer_config cfg = make_sstable_writer_config(_type);
cfg.monitor = monitor.get();
return compaction_writer{std::move(monitor), sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats()), sst};
// adjust the partitions based on the droppable tombstone estimate if the garbage collected sstable writer is enabled
uint64_t estimated_partitions = enable_garbage_collected_sstable_writer()
? std::max(1UL, uint64_t(ceil(partitions_per_sstable() * (1 - average_estimate_of_droppable_tombstone_ratio()))))
: partitions_per_sstable();
return compaction_writer{std::move(monitor), sst->get_writer(*_schema, estimated_partitions, cfg, get_encoding_stats()), sst};
}

virtual void stop_sstable_writer(compaction_writer* writer) override {
Expand Down

0 comments on commit 8905116

Please sign in to comment.