diff --git a/src/yb/master/master-path-handlers.cc b/src/yb/master/master-path-handlers.cc index 56fc14673d5..ff6ff4a1f68 100644 --- a/src/yb/master/master-path-handlers.cc +++ b/src/yb/master/master-path-handlers.cc @@ -166,6 +166,14 @@ class AutoFieldsetScope { std::stringstream& output_; }; + +std::optional ToUnsignedOrNullopt(int64_t val) { + if (val == std::numeric_limits::max()) { + return std::nullopt; + } else { + return val; + } +} } // namespace using consensus::RaftPeerPB; @@ -191,6 +199,7 @@ void MasterPathHandlers::TabletCounts::operator+=(const TabletCounts& other) { user_tablet_followers += other.user_tablet_followers; system_tablet_leaders += other.system_tablet_leaders; system_tablet_followers += other.system_tablet_followers; + hidden_tablet_peers += other.hidden_tablet_peers; } MasterPathHandlers::ZoneTabletCounts::ZoneTabletCounts( @@ -400,6 +409,40 @@ bool TabletServerComparator( } // anonymous namespace +MasterPathHandlers::UniverseTabletCounts MasterPathHandlers::CalculateUniverseTabletCounts( + const TabletCountMap& tablet_count_map, const std::vector>& descs, + const BlacklistSet& blacklist_set, + int hide_dead_node_threshold_mins) { + UniverseTabletCounts counts; + for (const auto& desc : descs) { + if (ShouldHideTserverNodeFromDisplay(desc.get(), hide_dead_node_threshold_mins)) { + continue; + } + const auto& placement_uuid = desc->placement_uuid(); + PlacementClusterTabletCounts& placement_cluster_counts = + counts.per_placement_cluster_counts[placement_uuid]; + if (auto* tablet_count = FindOrNull(tablet_count_map, desc->permanent_uuid())) { + placement_cluster_counts.counts += *tablet_count; + } + if (desc->IsBlacklisted(blacklist_set)) { + placement_cluster_counts.blacklisted_node_count++; + } else if (desc->IsLive()) { + placement_cluster_counts.live_node_count++; + } else { + placement_cluster_counts.dead_node_count++; + } + placement_cluster_counts.active_tablet_peer_count += desc->num_live_replicas(); + } + + auto limits = tserver::GetTabletReplicaPerResourceLimits(); + for (auto& [placement_uuid, cluster_counts] : counts.per_placement_cluster_counts) { + auto cluster_info = ComputeAggregatedClusterInfo(descs, placement_uuid); + cluster_counts.tablet_replica_limit = + ToUnsignedOrNullopt(ComputeTabletReplicaLimit(cluster_info, limits)); + } + return counts; +} + void MasterPathHandlers::TServerDisplay(const std::string& current_uuid, std::vector>* descs, TabletCountMap* tablet_map, @@ -425,7 +468,7 @@ void MasterPathHandlers::TServerDisplay(const std::string& current_uuid, // Comparator orders by cloud, region, zone and uuid fields. std::sort(local_descs.begin(), local_descs.end(), &TabletServerComparator); - for (auto desc : local_descs) { + for (const auto& desc : local_descs) { if (desc->placement_uuid() == current_uuid) { if (ShouldHideTserverNodeFromDisplay(desc.get(), hide_dead_node_threshold_mins)) { continue; @@ -522,6 +565,57 @@ void MasterPathHandlers::TServerDisplay(const std::string& current_uuid, *output << "\n"; } +void MasterPathHandlers::DisplayUniverseSummary( + const TabletCountMap& tablet_map, const std::vector>& all_descs, + const std::string& live_id, + int hide_dead_node_threshold_mins, + std::stringstream* output) { + auto blacklist_result = master_->catalog_manager()->BlacklistSetFromPB(); + BlacklistSet blacklist = blacklist_result.ok() ? *blacklist_result : BlacklistSet(); + auto universe_counts = CalculateUniverseTabletCounts( + tablet_map, all_descs, blacklist, hide_dead_node_threshold_mins); + + // auto include_placement_uuids = universe_counts.per_placement_cluster_counts.size() > 1; + // auto placement_uuid_header = include_placement_uuids ? "Cluster UUID\n" : ""; + *output << "

Universe Summary

\n" + << "\n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n"; + for (const auto& [placement_uuid, cluster_counts] : + universe_counts.per_placement_cluster_counts) { + auto placement_uuid_entry = Format( + "$0 $1", placement_uuid == live_id ? "Primary Cluster" : "Read Replica", placement_uuid); + auto limit_entry = cluster_counts.tablet_replica_limit.has_value() + ? Format("$0", *cluster_counts.tablet_replica_limit) + : "N/A"; + auto user_total = + cluster_counts.counts.user_tablet_followers + cluster_counts.counts.user_tablet_leaders; + auto system_total = + cluster_counts.counts.system_tablet_followers + cluster_counts.counts.system_tablet_leaders; + *output << "\n" + // << placement_uuid_entry + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n"; + } + *output << "
Cluster UUIDTotal Live TServersTotal Blacklisted TServersTotal Dead TServersUser Tablet-PeersSystem Tablet-PeersHidden Tablet-PeersActive Tablet-PeersTablet Peer Limit
" << placement_uuid_entry << "" << cluster_counts.live_node_count << "" << cluster_counts.blacklisted_node_count << "" << cluster_counts.dead_node_count << "" << user_total << "" << system_total << "" << cluster_counts.counts.hidden_tablet_peers << "" << cluster_counts.active_tablet_peer_count << "" << limit_entry << "
\n"; +} + void MasterPathHandlers::DisplayTabletZonesTable( const ZoneTabletCounts::CloudTree& cloud_tree, std::stringstream* output @@ -680,6 +774,9 @@ void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& req, } *output << std::setprecision(output_precision_); + if (viewType == TServersViewType::kTServersDefaultView) { + DisplayUniverseSummary(tablet_map, descs, live_id, hide_dead_node_threshold_override, output); + } *output << "

Tablet Servers

\n"; if (!live_id.empty()) { @@ -3372,22 +3469,24 @@ void MasterPathHandlers::CalculateTabletMap(TabletCountMap* tablet_map) { TabletInfos tablets = table->GetTablets(IncludeInactive::kTrue); bool is_user_table = master_->catalog_manager()->IsUserCreatedTable(*table); - for (const auto& tablet : tablets) { auto replication_locations = tablet->GetReplicaLocations(); - for (const auto& replica : *replication_locations) { + auto& counts = (*tablet_map)[replica.first]; + if (tablet->LockForRead()->is_hidden()) { + counts.hidden_tablet_peers++; + } if (is_user_table || table->IsColocationParentTable()) { if (replica.second.role == PeerRole::LEADER) { - (*tablet_map)[replica.first].user_tablet_leaders++; + counts.user_tablet_leaders++; } else { - (*tablet_map)[replica.first].user_tablet_followers++; + counts.user_tablet_followers++; } } else { if (replica.second.role == PeerRole::LEADER) { - (*tablet_map)[replica.first].system_tablet_leaders++; + counts.system_tablet_leaders++; } else { - (*tablet_map)[replica.first].system_tablet_followers++; + counts.system_tablet_followers++; } } } diff --git a/src/yb/master/master-path-handlers.h b/src/yb/master/master-path-handlers.h index 6b10fab28ff..e75c865b124 100644 --- a/src/yb/master/master-path-handlers.h +++ b/src/yb/master/master-path-handlers.h @@ -127,6 +127,9 @@ class MasterPathHandlers { uint32_t user_tablet_followers = 0; uint32_t system_tablet_leaders = 0; uint32_t system_tablet_followers = 0; + // Hidden tablets are not broken down by leader vs. follower or user vs. system. They just count + // the number of tablets peers which are hidden. + uint32_t hidden_tablet_peers = 0; void operator+=(const TabletCounts& other); }; @@ -148,9 +151,31 @@ class MasterPathHandlers { typedef std::map RegionTree; typedef std::map CloudTree; }; + + struct PlacementClusterTabletCounts { + TabletCounts counts; + uint32_t live_node_count = 0; + uint32_t blacklisted_node_count = 0; + uint32_t dead_node_count = 0; + uint32_t active_tablet_peer_count = 0; + // Tablet replica limits are computed from flag values. If these flag values are unset the + // universe will have no limit. This is represented with std::nullopt. + std::optional tablet_replica_limit = 0; + }; + + struct UniverseTabletCounts { + // Keys are placement_uuids. + std::unordered_map per_placement_cluster_counts; + }; + // Map of tserver UUID -> TabletCounts typedef std::unordered_map TabletCountMap; + UniverseTabletCounts CalculateUniverseTabletCounts( + const TabletCountMap& tablet_count_map, + const std::vector>& descs, const BlacklistSet& blacklist_set, + int hide_dead_node_threshold_mins); + struct ReplicaInfo { PeerRole role; TabletId tablet_id; @@ -183,6 +208,12 @@ class MasterPathHandlers { const int hide_dead_node_threshold_override, TServersViewType viewType); + void DisplayUniverseSummary( + const TabletCountMap& tablet_map, const std::vector>& all_descs, + const std::string& live_id, + int hide_dead_node_threshold_mins, + std::stringstream* output); + // Outputs a ZoneTabletCounts::CloudTree as an html table with a heading. static void DisplayTabletZonesTable( const ZoneTabletCounts::CloudTree& counts, diff --git a/src/yb/master/tablet_creation_limits.cc b/src/yb/master/tablet_creation_limits.cc index 7b596d4e28e..05b4c5963d2 100644 --- a/src/yb/master/tablet_creation_limits.cc +++ b/src/yb/master/tablet_creation_limits.cc @@ -74,6 +74,11 @@ AggregatedClusterInfo ComputeAggregatedClusterInfo( }; } +// TODO(zdrudi): This function is passed a filtered version of TSDescriptorVector - blacklisted and +// non-live tservers are removed. But tablet replicas hosted on blacklisted tservers aren't going +// to be deleted so they should be counted towards the total number of live tablet replicas. Alter +// this function to take the complete, unfiltered TSDescriptorVector and put logic directly into +// ComputeAggregatedClusterInfo to do the right thing with blacklisted and non-live tservers. Status CanCreateTabletReplicas( int num_tablets, const ReplicationInfoPB& replication_info, const TSDescriptorVector& ts_descs) {