Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: enable async manual compactions #57107

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/common/options/global.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -5030,6 +5030,13 @@ options:
This setting is used only when OSD is doing ``--mkfs``.
Next runs of OSD retrieve sharding from disk.
default: m(3) p(3,0-12) O(3,0-13)=block_cache={type=binned_lru} L=min_write_buffer_number_to_merge=32 P=min_write_buffer_number_to_merge=32
- name: bluestore_async_db_compaction
aclamk marked this conversation as resolved.
Show resolved Hide resolved
type: bool
level: dev
desc: Perform DB compaction requests asynchronously
long_desc: 'How to perform DB compactions triggered either through async socket or
by OSD initialization procedure on start.'
default: true
- name: bluestore_qfsck_on_mount
type: bool
level: dev
Expand Down
13 changes: 11 additions & 2 deletions src/kv/RocksDBStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,9 @@ int RocksDBStore::do_open(ostream &out,
plb.add_time_avg(l_rocksdb_submit_latency, "submit_latency", "Submit Latency");
plb.add_time_avg(l_rocksdb_submit_sync_latency, "submit_sync_latency", "Submit Sync Latency");
plb.add_u64_counter(l_rocksdb_compact, "compact", "Compactions");
plb.add_u64_counter(l_rocksdb_compact_range, "compact_range", "Compactions by range");
plb.add_u64_counter(l_rocksdb_compact_running, "compact_running", "Running compactions");
plb.add_u64_counter(l_rocksdb_compact_completed, "compact_completed", "Completed compactions");
plb.add_time(l_rocksdb_compact_lasted, "compact_lasted", "Last completed compaction duration");
plb.add_u64_counter(l_rocksdb_compact_queue_merge, "compact_queue_merge", "Mergings of ranges in compaction queue");
plb.add_u64(l_rocksdb_compact_queue_len, "compact_queue_len", "Length of compaction queue");
plb.add_time_avg(l_rocksdb_write_wal_time, "rocksdb_write_wal_time", "Rocksdb write wal time");
Expand Down Expand Up @@ -1988,6 +1990,7 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key)

void RocksDBStore::compact()
{
dout(2) << __func__ << " starting" << dendl;
logger->inc(l_rocksdb_compact);
rocksdb::CompactRangeOptions options;
db->CompactRange(options, default_cf, nullptr, nullptr);
Expand All @@ -1999,6 +2002,7 @@ void RocksDBStore::compact()
nullptr, nullptr);
}
}
dout(2) << __func__ << " completed" << dendl;
}

void RocksDBStore::compact_thread_entry()
Expand All @@ -2011,12 +2015,17 @@ void RocksDBStore::compact_thread_entry()
compact_queue.pop_front();
logger->set(l_rocksdb_compact_queue_len, compact_queue.size());
l.unlock();
logger->inc(l_rocksdb_compact_range);
logger->inc(l_rocksdb_compact_running);
auto start = ceph_clock_now();
if (range.first.empty() && range.second.empty()) {
compact();
} else {
compact_range(range.first, range.second);
}
auto lat = ceph_clock_now() - start;
logger->dec(l_rocksdb_compact_running);
logger->inc(l_rocksdb_compact_completed);
logger->tset(l_rocksdb_compact_lasted, lat);
l.lock();
continue;
}
Expand Down
4 changes: 3 additions & 1 deletion src/kv/RocksDBStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ enum {
l_rocksdb_submit_latency,
l_rocksdb_submit_sync_latency,
l_rocksdb_compact,
l_rocksdb_compact_range,
l_rocksdb_compact_running,
l_rocksdb_compact_completed,
l_rocksdb_compact_lasted,
l_rocksdb_compact_queue_merge,
l_rocksdb_compact_queue_len,
l_rocksdb_write_wal_time,
Expand Down
2 changes: 1 addition & 1 deletion src/os/ObjectStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ class ObjectStore {
virtual void inject_data_error(const ghobject_t &oid) {}
virtual void inject_mdata_error(const ghobject_t &oid) {}

virtual void compact() {}
virtual int compact() { return -ENOTSUP; }
virtual bool has_builtin_csum() const {
return false;
}
Expand Down
16 changes: 16 additions & 0 deletions src/os/bluestore/BlueStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11221,6 +11221,22 @@ void BlueStore::inject_bluefs_file(std::string_view dir, std::string_view name,
bluefs->close_writer(p_handle);
}

int BlueStore::compact()
{
int r = 0;
ceph_assert(db);
if (cct->_conf.get_val<bool>("bluestore_async_db_compaction")) {
dout(1) << __func__ << " starting async.." << dendl;
db->compact_async();
r = -EINPROGRESS;
} else {
dout(1) << __func__ << " starting sync.." << dendl;
db->compact();
dout(1) << __func__ << " finished." << dendl;
}
return r;
}

void BlueStore::collect_metadata(map<string,string> *pm)
{
dout(10) << __func__ << dendl;
Expand Down
5 changes: 1 addition & 4 deletions src/os/bluestore/BlueStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -3379,10 +3379,7 @@ class BlueStore : public ObjectStore,
std::string_view name,
size_t new_size);

void compact() override {
ceph_assert(db);
db->compact();
}
int compact() override;
bool has_builtin_csum() const override {
return true;
}
Expand Down
3 changes: 2 additions & 1 deletion src/os/kstore/KStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,9 +580,10 @@ class KStore : public ObjectStore {
TrackedOpRef op = TrackedOpRef(),
ThreadPool::TPHandle *handle = NULL) override;

void compact () override {
int compact () override {
ceph_assert(db);
db->compact();
return 0;
}

private:
Expand Down
27 changes: 17 additions & 10 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2976,15 +2976,22 @@ will start to track new ops received afterwards.";
} else if (prefix == "compact") {
dout(1) << "triggering manual compaction" << dendl;
auto start = ceph::coarse_mono_clock::now();
store->compact();
auto end = ceph::coarse_mono_clock::now();
double duration = std::chrono::duration<double>(end-start).count();
dout(1) << "finished manual compaction in "
<< duration
<< " seconds" << dendl;
f->open_object_section("compact_result");
f->dump_float("elapsed_time", duration);
f->close_section();
int r = store->compact();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be better if we had here control between compact and compact async ?

if (r == 0) {
auto end = ceph::coarse_mono_clock::now();
double duration = std::chrono::duration<double>(end-start).count();

dout(1) << "finished manual compaction in "
<< duration
<< " seconds" << dendl;
f->open_object_section("compact_result");
f->dump_float("elapsed_time", duration);
f->close_section();
} else if ( r == -EINPROGRESS) {
dout(1) << "manual compaction is being executed asynchronously" << dendl;
} else {
derr << "error starting manual compaction:" << cpp_strerror(r) << dendl;
}
} else if (prefix == "get_mapped_pools") {
f->open_array_section("mapped_pools");
set<int64_t> poollist = get_mapped_pools();
Expand Down Expand Up @@ -3900,7 +3907,7 @@ int OSD::init()
dout(2) << "superblock: I am osd." << superblock.whoami << dendl;

if (cct->_conf.get_val<bool>("osd_compact_on_start")) {
dout(2) << "compacting object store's omap" << dendl;
dout(2) << "compacting object store's DB" << dendl;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, makes more sense.

store->compact();
}

Expand Down