Skip to content

Commit

Permalink
[#21572] docdb: Clone hidden tables from active rocksdb
Browse files Browse the repository at this point in the history
Summary:
When cloning to time `t`, some of the tablets which were running as of `t` might have been deleted before the snapshot covering time `t` was taken.
Since we do not snapshot hidden tables, the SST files for these tablets must be hardlinked from the tablet's active rocksdb (since the tablets are hidden, there is no difference between the data in the active rocks db and the data as of `t`). Note that it is not sufficient to clone from active rocksdb for all tablets that are hidden when the clone operation starts because if a tablet **was** included in the covering snapshot, then active rocksdb is only guaranteed to contain information after the covering snapshot time.

This diff adds the above functionality by keeping track of which tablets were not part of the covering snapshot, and setting the new field `clone_from_active_rocksdb` in the clone requests for those tablets.

**Upgrade/Rollback safety:**
Change is to an API (CloneTablet) which should not be used in any production system yet.

Fixes #21572.
Jira: DB-10456

Test Plan: `./yb_build.sh --cxx_test snapshot-schedule-test --gtest_filter=CloneFromScheduleTest.CloneAfterDrop`

Reviewers: mhaddad

Reviewed By: mhaddad

Subscribers: ybase, bogdan

Differential Revision: https://phorge.dev.yugabyte.com/D33922
  • Loading branch information
SrivastavaAnubhav committed Apr 26, 2024
1 parent 7cb173c commit c295c10
Show file tree
Hide file tree
Showing 11 changed files with 240 additions and 113 deletions.
27 changes: 27 additions & 0 deletions src/yb/client/snapshot-schedule-test.cc
Expand Up @@ -447,6 +447,33 @@ TEST_F(CloneFromScheduleTest, CloneWithNoSchedule) {
ASSERT_STR_CONTAINS(status.message().ToBuffer(), "Could not find snapshot schedule");
}

TEST_F(CloneFromScheduleTest, CloneAfterDrop) {
auto schedule_id = ASSERT_RESULT(
snapshot_util_->CreateSchedule(table_, kTableName.namespace_type(),
kTableName.namespace_name()));
ASSERT_OK(snapshot_util_->WaitScheduleSnapshot(schedule_id));

ASSERT_NO_FATALS(WriteData(WriteOpType::INSERT, 0 /* transaction */));
auto row_count = CountTableRows(table_);
auto ht = cluster_->mini_master()->master()->clock()->Now();

ASSERT_OK(client_->DeleteTable(kTableName));

master::CloneNamespaceRequestPB req;
master::NamespaceIdentifierPB source_namespace;
source_namespace.set_name(kTableName.namespace_name());
source_namespace.set_database_type(YQLDatabase::YQL_DATABASE_CQL);
*req.mutable_source_namespace() = source_namespace;
req.set_restore_ht(ht.ToUint64());
req.set_target_namespace_name("clone" /* target_namespace_name */);
ASSERT_OK(CloneAndWait(req));

YBTableName clone(YQL_DATABASE_CQL, "clone", kTableName.table_name());
TableHandle clone_handle;
ASSERT_OK(clone_handle.Open(clone, client_.get()));
ASSERT_EQ(CountTableRows(clone_handle), row_count);
}

TEST_F(SnapshotScheduleTest, RemoveNewTablets) {
const auto kInterval = 5s * kTimeMultiplier;
const auto kRetention = kInterval * 2;
Expand Down
4 changes: 2 additions & 2 deletions src/yb/integration-tests/minicluster-snapshot-test.cc
Expand Up @@ -469,7 +469,7 @@ TEST_P(MasterExportSnapshotTest, ExportSnapshotAsOfTime) {
LOG(INFO) << Format(
"Exporting snapshot from snapshot schedule: $0, Hybrid time = $1", schedule_id, time);
auto deadline = CoarseMonoClock::Now() + timeout;
master::SnapshotInfoPB snapshot_info_as_of_time = ASSERT_RESULT(
auto [snapshot_info_as_of_time, not_snapshotted_tablets] = ASSERT_RESULT(
mini_cluster()->mini_master()->catalog_manager_impl().GenerateSnapshotInfoFromSchedule(
schedule_id, HybridTime::FromMicros(static_cast<uint64>(time.ToInt64())), deadline));
// 6.
Expand Down Expand Up @@ -508,7 +508,7 @@ TEST_P(MasterExportSnapshotTest, ExportSnapshotAsOfTimeWithHiddenTables) {
LOG(INFO) << Format(
"Exporting snapshot from snapshot schedule: $0, Hybrid time = $1", schedule_id, time);
auto deadline = CoarseMonoClock::Now() + timeout;
master::SnapshotInfoPB snapshot_info_as_of_time = ASSERT_RESULT(
auto [snapshot_info_as_of_time, not_snapshotted_tablets] = ASSERT_RESULT(
mini_cluster()->mini_master()->catalog_manager_impl().GenerateSnapshotInfoFromSchedule(
schedule_id, HybridTime::FromMicros(static_cast<uint64>(time.ToInt64())), deadline));
// 6. Assert the output of 5 and 3 are the same.
Expand Down
22 changes: 14 additions & 8 deletions src/yb/master/catalog_manager.h
Expand Up @@ -1213,16 +1213,19 @@ class CatalogManager : public tserver::TabletPeerLookupIf,
const ListSnapshotRestorationsRequestPB* req,
ListSnapshotRestorationsResponsePB* resp) override;

// Generate the snapshot info as of export_time from the provided snapshot schedule.
Result<SnapshotInfoPB> GenerateSnapshotInfoFromSchedule(
const SnapshotScheduleId& snapshot_schedule_id, HybridTime export_time,
CoarseTimePoint deadline) override;
Result<SnapshotInfoPB> GetSnapshotInfoForBackup(const TxnSnapshotId& snapshot_id);

// Generate the SnapshotInfoPB as of read_time from the provided snapshot schedule, and return
// the set of tablets that were RUNNING as of read_time but were HIDDEN before the actual snapshot
// was taken).
// The SnapshotInfoPB generated by export snapshot as of time should be identical to the
// SnapshotInfoPB generated by the normal export_snapshot (even ordering of tables/tablets)
Result<SnapshotInfoPB> GenerateSnapshotInfoPbAsOfTime(
const TxnSnapshotId& snapshot_id, HybridTime read_time, const docdb::DocDB& doc_db,
std::reference_wrapper<const ScopedRWOperation> db_pending_op);
// SnapshotInfoPB generated by the normal export_snapshot (even ordering of tables/tablets).
Result<std::pair<SnapshotInfoPB, std::unordered_set<TabletId>>> GenerateSnapshotInfoFromSchedule(
const SnapshotScheduleId& snapshot_schedule_id, HybridTime read_time,
CoarseTimePoint deadline) override;

Result<google::protobuf::RepeatedPtrField<BackupRowEntryPB>> GetBackupEntriesAsOfTime(
const TxnSnapshotId& snapshot_id, const NamespaceId& source_ns_id, HybridTime read_time);

Status RestoreSnapshot(
const RestoreSnapshotRequestPB* req, RestoreSnapshotResponsePB* resp, rpc::RpcContext* rpc,
Expand Down Expand Up @@ -2764,6 +2767,9 @@ class CatalogManager : public tserver::TabletPeerLookupIf,

void ScheduleTabletSnapshotOp(const AsyncTabletSnapshotOpPtr& operation) override;

Result<std::unique_ptr<rocksdb::DB>> RestoreSnapshotToTmpRocksDb(
tablet::Tablet* tablet, const TxnSnapshotId& snapshot_id, HybridTime restore_at);

Status RestoreSysCatalogCommon(
SnapshotScheduleRestoration* restoration, tablet::Tablet* tablet,
std::reference_wrapper<const ScopedRWOperation> pending_op,
Expand Down

0 comments on commit c295c10

Please sign in to comment.