From commits-return-6960-apmail-kudu-commits-archive=kudu.apache.org@kudu.apache.org Fri Feb 8 00:58:38 2019 Return-Path: X-Original-To: apmail-kudu-commits-archive@minotaur.apache.org Delivered-To: apmail-kudu-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B574F10A6F for ; Fri, 8 Feb 2019 00:58:38 +0000 (UTC) Received: (qmail 1680 invoked by uid 500); 8 Feb 2019 00:58:38 -0000 Delivered-To: apmail-kudu-commits-archive@kudu.apache.org Received: (qmail 1647 invoked by uid 500); 8 Feb 2019 00:58:38 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 1637 invoked by uid 99); 8 Feb 2019 00:58:38 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 08 Feb 2019 00:58:38 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id AD434812CC; Fri, 8 Feb 2019 00:58:37 +0000 (UTC) Date: Fri, 08 Feb 2019 00:58:39 +0000 To: "commits@kudu.apache.org" Subject: [kudu] 02/02: [rebalancer] location-aware rebalancer (part 10/n) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit From: alexey@apache.org In-Reply-To: <154958751760.6202.1382580406790348275@gitbox.apache.org> References: <154958751760.6202.1382580406790348275@gitbox.apache.org> X-Git-Host: gitbox.apache.org X-Git-Repo: kudu X-Git-Refname: refs/heads/branch-1.9.x X-Git-Reftype: branch X-Git-Rev: cb5c6cd58bda8fc4efc01cca1c520be6b4fb6bf2 X-Git-NotificationType: diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated Message-Id: <20190208005837.AD434812CC@gitbox.apache.org> This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.9.x in repository https://gitbox.apache.org/repos/asf/kudu.git commit cb5c6cd58bda8fc4efc01cca1c520be6b4fb6bf2 Author: Alexey Serbin AuthorDate: Tue Nov 6 15:11:25 2018 -0800 [rebalancer] location-aware rebalancer (part 10/n) This patch makes the placement policy fixer and the cross-location rebalancing phases take into account already scheduled replica movements (a.k.a. moves is progress). Also, added more integration tests for the LA rebalancer tool. Change-Id: I27ada3c3dc50f45d1a95aaa51c36e2d40d8f3d49 Reviewed-on: http://gerrit.cloudera.org:8080/11892 Tested-by: Kudu Jenkins Reviewed-by: Will Berkeley (cherry picked from commit 4433fa5c787df13f95c595af15c470f89bd2957d) Reviewed-on: http://gerrit.cloudera.org:8080/12395 Reviewed-by: Andrew Wong Tested-by: Alexey Serbin --- src/kudu/tools/placement_policy_util.cc | 102 +++++++++++--- src/kudu/tools/placement_policy_util.h | 12 +- src/kudu/tools/rebalance_algo.cc | 2 +- src/kudu/tools/rebalance_algo.h | 4 +- src/kudu/tools/rebalancer.cc | 71 +++++++--- src/kudu/tools/rebalancer.h | 4 - src/kudu/tools/rebalancer_tool-test.cc | 228 +++++++++++++++++++++++++++++++- 7 files changed, 367 insertions(+), 56 deletions(-) diff --git a/src/kudu/tools/placement_policy_util.cc b/src/kudu/tools/placement_policy_util.cc index 48456f7..b92ff3b 100644 --- a/src/kudu/tools/placement_policy_util.cc +++ b/src/kudu/tools/placement_policy_util.cc @@ -209,8 +209,10 @@ Status FindBestReplicaToReplace( } // anonymous namespace -Status BuildTabletsPlacementInfo(const ClusterRawInfo& raw_info, - TabletsPlacementInfo* info) { +Status BuildTabletsPlacementInfo( + const ClusterRawInfo& raw_info, + const Rebalancer::MovesInProgress& moves_in_progress, + TabletsPlacementInfo* info) { DCHECK(info); unordered_map tables_info; @@ -236,11 +238,9 @@ Status BuildTabletsPlacementInfo(const ClusterRawInfo& raw_info, decltype(TabletsPlacementInfo::tablet_location_info) tablet_location_info; for (const auto& tablet_summary : raw_info.tablet_summaries) { const auto& tablet_id = tablet_summary.id; - if (tablet_summary.result != KsckCheckResult::HEALTHY) { - // TODO(aserbin): should this be reported as some transient condition - // to be taken into account? E.g., a tablet might be - // in process of copying data to a new replica to replace - // another replica which violates the placement policy. + // TODO(aserbin): process RF=1 tablets as necessary + if (tablet_summary.result != KsckCheckResult::HEALTHY && + tablet_summary.result != KsckCheckResult::RECOVERING) { VLOG(1) << Substitute("tablet $0: not considering replicas for movement " "since the tablet's status is '$1'", tablet_id, @@ -249,31 +249,95 @@ Status BuildTabletsPlacementInfo(const ClusterRawInfo& raw_info, } EmplaceOrDie(&tablet_to_table_id, tablet_id, tablet_summary.table_id); + // Check if it's one of the tablets which are currently being rebalanced. + // If so, interpret the move as successfully completed, updating the replica + // counts correspondingly. TabletInfo tablet_info; - for (const auto& replica_info : tablet_summary.replicas) { - TabletReplicaInfo info; - info.ts_uuid = replica_info.ts_uuid; - if (replica_info.is_leader) { - info.role = ReplicaRole::LEADER; - } else { - info.role = replica_info.is_voter ? ReplicaRole::FOLLOWER_VOTER - : ReplicaRole::FOLLOWER_NONVOTER; + const auto it_pending_moves = moves_in_progress.find(tablet_id); + if (it_pending_moves != moves_in_progress.end()) { + const auto& move_info = it_pending_moves->second; + // Check if the target replica is present in the config. + bool is_target_replica_present = false; + for (const auto& tr : tablet_summary.replicas) { + if (tr.ts_uuid == move_info.ts_uuid_to) { + is_target_replica_present = true; + break; + } + } + // If the target replica is present, it will be processed in the code + // below. Otherwise, it's necessary to pretend as if the target replica + // is in the config already: the idea is to count in the absent target + // replica as if the movement has successfully completed already. + // + // NOTE: an empty UUID for the target replica means the source replica + // is being kicked out from the config and the system will + // automatically add the replacement replica at the most appropriate + // location. + if (!is_target_replica_present && !move_info.ts_uuid_to.empty()) { + ++LookupOrEmplace(&replica_num_by_ts_id, move_info.ts_uuid_to, 0); + + // Populate ClusterLocationInfo::tablet_location_info. + auto& count_by_location = LookupOrEmplace(&tablet_location_info, + tablet_id, + unordered_map()); + const auto& location = FindOrDie(location_by_ts_id, move_info.ts_uuid_to); + ++LookupOrEmplace(&count_by_location, location, 0); + + { + // Faking an appearance of a new voter replica in the config: that's + // to reflect the completion of the scheduled replica movement. + TabletReplicaInfo info; + info.ts_uuid = move_info.ts_uuid_to; + info.role = ReplicaRole::FOLLOWER_VOTER; + tablet_info.replicas_info.emplace_back(std::move(info)); + } } + } + + for (const auto& replica_info : tablet_summary.replicas) { if (replica_info.is_leader && replica_info.consensus_state) { const auto& cstate = *replica_info.consensus_state; if (cstate.opid_index) { tablet_info.config_idx = *cstate.opid_index; } } - ++LookupOrEmplace(&replica_num_by_ts_id, replica_info.ts_uuid, 0); + bool do_count_replica = true; + if (it_pending_moves != moves_in_progress.end()) { + const auto& move_info = it_pending_moves->second; + if (move_info.ts_uuid_from == replica_info.ts_uuid) { + DCHECK(!replica_info.ts_uuid.empty()); + // Do not count the source replica for the scheduled/in-progress + // replica movement. The idea is to consider pending replica movements + // as if they have already completed successfully. + do_count_replica = false; + } + } + + auto& replica_count = + LookupOrEmplace(&replica_num_by_ts_id, replica_info.ts_uuid, 0); + if (do_count_replica) { + ++replica_count; + } // Populate ClusterLocationInfo::tablet_location_info. auto& count_by_location = LookupOrEmplace(&tablet_location_info, tablet_id, unordered_map()); - const auto& location = FindOrDie(location_by_ts_id, info.ts_uuid); - ++LookupOrEmplace(&count_by_location, location, 0); - tablet_info.replicas_info.emplace_back(std::move(info)); + const auto& location = FindOrDie(location_by_ts_id, replica_info.ts_uuid); + auto& count = LookupOrEmplace(&count_by_location, location, 0); + if (do_count_replica) { + ++count; + + TabletReplicaInfo info; + info.ts_uuid = replica_info.ts_uuid; + if (replica_info.is_leader) { + info.role = ReplicaRole::LEADER; + } else { + info.role = replica_info.is_voter ? ReplicaRole::FOLLOWER_VOTER + : ReplicaRole::FOLLOWER_NONVOTER; + } + tablet_info.replicas_info.emplace_back(std::move(info)); + } } EmplaceOrDie(&tablets_info, tablet_id, std::move(tablet_info)); } diff --git a/src/kudu/tools/placement_policy_util.h b/src/kudu/tools/placement_policy_util.h index 2938d17..7547b8e 100644 --- a/src/kudu/tools/placement_policy_util.h +++ b/src/kudu/tools/placement_policy_util.h @@ -76,9 +76,15 @@ struct TabletsPlacementInfo { std::unordered_map replica_num_by_ts_id; }; -// Convert ClusterRawInfo into TabletsPlacementInfo. -Status BuildTabletsPlacementInfo(const ClusterRawInfo& raw_info, - TabletsPlacementInfo* info); +// Convert ClusterRawInfo into TabletsPlacementInfo. The 'moves_in_progress' +// parameter contains information on the replica moves which have been scheduled +// by a caller and still in progress: those are considered as successfully +// completed and applied to the 'raw_info' when building TabletsPlacementInfo +// for the specified 'raw_info' input. +Status BuildTabletsPlacementInfo( + const ClusterRawInfo& raw_info, + const Rebalancer::MovesInProgress& moves_in_progress, + TabletsPlacementInfo* info); // Information on a violation of the basic placement policy constraint. // The basic constraint is: for any tablet, no location should contain diff --git a/src/kudu/tools/rebalance_algo.cc b/src/kudu/tools/rebalance_algo.cc index 8065f51..eaec6da 100644 --- a/src/kudu/tools/rebalance_algo.cc +++ b/src/kudu/tools/rebalance_algo.cc @@ -552,7 +552,7 @@ Status LocationBalancingAlgo::FindBestMove( // (i.e. number of table replicas / number of tablet servers), but it's // always beneficial to have less loaded servers in absolute terms. // - // If there are multiple candiate tablet servers with the same extremum load, + // If there are multiple candidate tablet servers with the same extremum load, // choose among them randomly. // // TODO(aserbin): implement fine-grained logic to select the best move among diff --git a/src/kudu/tools/rebalance_algo.h b/src/kudu/tools/rebalance_algo.h index 3b101c8..7ed1822 100644 --- a/src/kudu/tools/rebalance_algo.h +++ b/src/kudu/tools/rebalance_algo.h @@ -167,7 +167,7 @@ class TwoDimensionalGreedyAlgo : public RebalancingAlgo { // output into the 'server_uuids' parameter. Whether to consider most or least // loaded servers is controlled by 'extremum'. An empty 'intersection' on // return means no intersection was found for the mentioned sets of the - // extremally loaded servers: in that case optimizing the load by table would + // extremely loaded servers: in that case optimizing the load by table would // not affect the extreme load by server. // // None of the output parameters may be NULL. @@ -220,7 +220,7 @@ class TwoDimensionalGreedyAlgo : public RebalancingAlgo { // a table T. Assume we have locations L_0, ..., L_n, where // replica_num(T, L_0), ..., replica_num(T, L_n) are numbers of replicas // of T's tablets at corresponding locations. We want to make the following -// ratios to devicate as less as possible: +// ratios to deviate as less as possible: // // replica_num(T, L_0) / ts_num(L_0), ..., replica_num(T, L_n) / ts_num(L_n) // diff --git a/src/kudu/tools/rebalancer.cc b/src/kudu/tools/rebalancer.cc index 934d42e..a1548b4 100644 --- a/src/kudu/tools/rebalancer.cc +++ b/src/kudu/tools/rebalancer.cc @@ -460,6 +460,11 @@ Status Rebalancer::FilterCrossLocationTabletCandidates( "the source ($2) and the destination ($3) tablet servers", move.table_id, src_location, move.from, move.to)); } + if (dst_location.empty()) { + // The destination location is not specified, so no restrictions on the + // destination location to check for. + return Status::OK(); + } vector tablet_ids_filtered; for (auto& tablet_id : *tablet_ids) { @@ -476,7 +481,11 @@ Status Rebalancer::FilterCrossLocationTabletCandidates( const auto& table_id = FindOrDie(placement_info.tablet_to_table_id, tablet_id); const auto& table_info = FindOrDie(placement_info.tables_info, table_id); const auto rf = table_info.replication_factor; - if (location_replica_num + 1 >= consensus::MajoritySize(rf)) { + // In case of RF=2*N+1, losing (N + 1) replicas means losing the majority. + // In case of RF=2*N, losing at least N replicas means losing the majority. + const auto replica_num_threshold = rf % 2 ? consensus::MajoritySize(rf) + : rf / 2; + if (location_replica_num + 1 >= replica_num_threshold) { VLOG(1) << Substitute("destination location '$0' for candidate tablet $1 " "already contains $2 of $3 replicas", dst_location, tablet_id, location_replica_num, rf); @@ -628,12 +637,13 @@ Status Rebalancer::PrintLocationBalanceStats(const string& location, Status Rebalancer::PrintPolicyViolationInfo(const ClusterRawInfo& raw_info, ostream& out) const { TabletsPlacementInfo placement_info; - RETURN_NOT_OK(BuildTabletsPlacementInfo(raw_info, &placement_info)); + RETURN_NOT_OK(BuildTabletsPlacementInfo( + raw_info, MovesInProgress(), &placement_info)); vector ppvi; RETURN_NOT_OK(DetectPlacementPolicyViolations(placement_info, &ppvi)); out << "Placement policy violations:" << endl; if (ppvi.empty()) { - out << " none" << endl << endl;; + out << " none" << endl << endl; return Status::OK(); } @@ -741,6 +751,32 @@ Status Rebalancer::BuildClusterInfo(const ClusterRawInfo& raw_info, // If so, interpret the move as successfully completed, updating the // replica counts correspondingly. const auto it_pending_moves = moves_in_progress.find(tablet.id); + if (it_pending_moves != moves_in_progress.end()) { + const auto& move_info = it_pending_moves->second; + bool is_target_replica_present = false; + // Verify that the target replica is present in the config. + for (const auto& tr : tablet.replicas) { + if (tr.ts_uuid == move_info.ts_uuid_to) { + is_target_replica_present = true; + break; + } + } + // If the target replica is present, it will be processed in the code + // below. Otherwise, it's necessary to pretend as if the target replica + // is in the config already: the idea is to count in the absent target + // replica as if the movement has successfully completed already. + auto it = tserver_replicas_count.find(move_info.ts_uuid_to); + if (!is_target_replica_present && !move_info.ts_uuid_to.empty() && + it != tserver_replicas_count.end()) { + it->second++; + auto table_ins = table_replicas_info.emplace( + tablet.table_id, TableReplicasAtServer()); + TableReplicasAtServer& replicas_at_server = table_ins.first->second; + + auto replicas_ins = replicas_at_server.emplace(move_info.ts_uuid_to, 0); + replicas_ins.first->second++; + } + } for (const auto& ri : tablet.replicas) { // Increment total count of replicas at the tablet server. @@ -755,22 +791,12 @@ Status Rebalancer::BuildClusterInfo(const ClusterRawInfo& raw_info, continue; } bool do_count_replica = true; - if (it_pending_moves != moves_in_progress.end() && - tablet.result == KsckCheckResult::RECOVERING) { + if (it_pending_moves != moves_in_progress.end()) { const auto& move_info = it_pending_moves->second; - bool is_target_replica_present = false; - // Verify that the target replica is present in the config. - for (const auto& tr : tablet.replicas) { - if (tr.ts_uuid == move_info.ts_uuid_to) { - is_target_replica_present = true; - break; - } - } - if (move_info.ts_uuid_from == ri.ts_uuid && is_target_replica_present) { - // It seems both the source and the destination replicas of the - // scheduled replica movement operation are still in the config. - // That's a sign that the move operation hasn't yet completed. - // As explained above, let's interpret the move as successfully + if (move_info.ts_uuid_from == ri.ts_uuid) { + DCHECK(!ri.ts_uuid.empty()); + // The source replica of the scheduled replica movement operation + // are still in the config. Interpreting the move as successfully // completed, so the source replica should not be counted in. do_count_replica = false; } @@ -1203,7 +1229,7 @@ Status Rebalancer::AlgoBasedRunner::GetNextMovesImpl( TabletsPlacementInfo tpi; if (!loc) { - RETURN_NOT_OK(BuildTabletsPlacementInfo(raw_info, &tpi)); + RETURN_NOT_OK(BuildTabletsPlacementInfo(raw_info, scheduled_moves_, &tpi)); } // Build 'tablet_id' --> 'target tablet replication factor' map. @@ -1490,7 +1516,7 @@ bool Rebalancer::PolicyFixer::ScheduleNextMove(bool* has_errors, } CHECK(erased) << Substitute("T $0 P $1: move information not found", move_info.tablet_uuid, move_info.ts_uuid_from); - LOG(INFO) << Substitute("tablet $0: $1 -> ? move scheduled", + LOG(INFO) << Substitute("tablet $0: '$1' -> '?' move scheduled", move_info.tablet_uuid, move_info.ts_uuid_from); // Add information on scheduled move into the scheduled_moves_. // Only one replica of a tablet can be moved at a time. @@ -1565,10 +1591,11 @@ Status Rebalancer::PolicyFixer::GetNextMovesImpl( } } ClusterInfo ci; - RETURN_NOT_OK(rebalancer_->BuildClusterInfo(raw_info, MovesInProgress(), &ci)); + RETURN_NOT_OK(rebalancer_->BuildClusterInfo(raw_info, scheduled_moves_, &ci)); TabletsPlacementInfo placement_info; - RETURN_NOT_OK(BuildTabletsPlacementInfo(raw_info, &placement_info)); + RETURN_NOT_OK( + BuildTabletsPlacementInfo(raw_info, scheduled_moves_, &placement_info)); vector ppvi; RETURN_NOT_OK(DetectPlacementPolicyViolations(placement_info, &ppvi)); diff --git a/src/kudu/tools/rebalancer.h b/src/kudu/tools/rebalancer.h index d42cda5..9aa3241 100644 --- a/src/kudu/tools/rebalancer.h +++ b/src/kudu/tools/rebalancer.h @@ -318,10 +318,6 @@ class Rebalancer { // tserver UUID (i.e. the key) as the destination of the move operation'. std::unordered_map> dst_op_indices_; - // Information on scheduled replica movement operations; keys are - // tablet UUIDs, values are ReplicaMove structures. - MovesInProgress scheduled_moves_; - // Random device and generator for selecting among multiple choices, when // appropriate. std::random_device random_device_; diff --git a/src/kudu/tools/rebalancer_tool-test.cc b/src/kudu/tools/rebalancer_tool-test.cc index 9fcd997..7c89418 100644 --- a/src/kudu/tools/rebalancer_tool-test.cc +++ b/src/kudu/tools/rebalancer_tool-test.cc @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -83,6 +85,7 @@ using std::atomic; using std::back_inserter; using std::copy; using std::endl; +using std::ostream; using std::ostringstream; using std::string; using std::thread; @@ -387,7 +390,7 @@ TEST_P(RebalanceParamTest, Rebalance) { ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); ASSERT_STR_CONTAINS(out, "rebalancing is complete: cluster is balanced") << "stderr: " << err; - // The cluster was un-balanced, so many replicas should have been moved. + // The cluster was unbalanced, so many replicas should have been moved. ASSERT_STR_NOT_CONTAINS(out, "(moved 0 replicas)"); } @@ -395,6 +398,9 @@ TEST_P(RebalanceParamTest, Rebalance) { NO_FATALS(ClusterVerifier(cluster_.get()).CheckCluster()); } +// Working around limitations of older libstdc++. +static const unordered_set kEmptySet = unordered_set(); + // Common base for the rebalancer-related test below. class RebalancingTest : public tserver::TabletServerIntegrationTestBase { public: @@ -418,9 +424,6 @@ class RebalancingTest : public tserver::TabletServerIntegrationTestBase { static const char* const kExitOnSignalStr; static const char* const kTableNamePattern; - // Working around limitations of older libstdc++. - static const unordered_set kEmptySet; - void Prepare(const vector& extra_tserver_flags = {}, const vector& extra_master_flags = {}, const LocationInfo& location_info = {}, @@ -520,7 +523,6 @@ class RebalancingTest : public tserver::TabletServerIntegrationTestBase { }; const char* const RebalancingTest::kExitOnSignalStr = "kudu: process exited on signal"; const char* const RebalancingTest::kTableNamePattern = "rebalance_test_table_$0"; -const unordered_set RebalancingTest::kEmptySet = unordered_set(); typedef testing::WithParamInterface Kudu1097ParamTest; @@ -1427,5 +1429,221 @@ Placement policy violations: << ToolRunInfo(s, out, err); } +// Parameters for the location-aware rebalancing tests. +typedef struct { + int replication_factor; + int tables_num; + + // Information on the distribution of tablet servers among locations. + LocationInfo location_info; + + // Locations where tablet servers are shutdown during tablet creation + // to achieve non-balanced tablet replica distribution. + unordered_set excluded_locations; +} LaRebalancingParams; + +const LaRebalancingParams kLaRebalancingParams[] = { + // RF=3, 1 table, 1 location, 3 tablet servers. + { 3, 1, + { + { { "/A" }, 3 } + }, + kEmptySet }, + + // RF=3, 3 tables, 2 locations, 4 (3 + 1) tablet servers. + { 3, 3, + { + { { "/A" }, 3 }, + { { "/B" }, 1 }, + }, + { "/B" } + }, + + // RF=3, 3 tables, 2 locations, 5 (3 + 2) tablet servers. + { 3, 3, + { + { { "/A" }, 3 }, + { { "/B" }, 2 }, + }, + { "/B" } + }, + + // RF=3, 3 tables, 2 locations, 6 (3 + 3) tablet servers. + { 3, 3, + { + { { "/A" }, 3 }, + { { "/B" }, 3 }, + }, + { "/B" } + }, + + // RF=3, 3 tables, 3 locations, 6 (3 + 2 + 1) tablet servers. + { 3, 3, + { + { { "/A" }, 3 }, + { { "/B" }, 2 }, + { { "/C" }, 1 }, + }, + { "/B" } + }, + + // RF=5, 3 locations, 4 tables, 8 (3 + 2 + 3) tablet servers. + { 5, 4, + { + { { "/A" }, 3 }, + { { "/B" }, 2 }, + { { "/C" }, 3 }, + }, { "/C" } + }, + + // RF=7, 3 locations, 7 tables, 10 (3 + 4 + 3) tablet servers. + { 7, 7, + { + { { "/A" }, 3 }, + { { "/B" }, 4 }, + { { "/C" }, 3 }, + }, { "/C" } + }, + // RF=2, 4 locations, 4 (1 + 2 + 1) tablet servers. + { 2, 3, + { + { { "/A" }, 1 }, + { { "/B" }, 2 }, + { { "/C" }, 1 }, + }, { "/B" } + }, + + // RF=4, 4 locations, 6 (3 + 1 + 1 + 1) tablet servers. + { 4, 8, + { + { { "/A" }, 3 }, + { { "/B" }, 1 }, + { { "/C" }, 1 }, + { { "/D" }, 1 }, + }, { "/B", "/D" } + }, +}; + +// Custom name generator for LA rebalancing scenarios described by +// LaRebalancingParams. +static string LaRebalancingTestName( + const testing::TestParamInfo& info) { + ostringstream str; + const auto& p = info.param; + str << "idx" << info.index + << "_rf" << p.replication_factor + << "_t" << p.tables_num + << "_l"; + for (const auto& elem : p.location_info) { + str << "_" << elem.second; + } + return str.str(); +} + +// This is used by 'operator<<(ostream&, const LaRebalancingParams&)' below. +ostream& operator <<(ostream& out, const LocationInfo& info) { + out << "{ "; + for (const auto& elem : info) { + out << elem.first << ":" << elem.second << " "; + } + out << "}"; + return out; +} + +// This is useful to print the configuration of a failed param test. +ostream& operator <<(ostream& out, const LaRebalancingParams& info) { + out << "{ rep_factor: " << info.replication_factor; + out << ", num_tables: " << info.tables_num + << ", location_info: "; + out << info.location_info; + out << " }"; + return out; +} + +class LocationAwareRebalancingParamTest : + public RebalancingTest, + public ::testing::WithParamInterface { + public: + LocationAwareRebalancingParamTest() + : RebalancingTest(GetParam().tables_num, + GetParam().replication_factor, + std::accumulate(GetParam().location_info.begin(), + GetParam().location_info.end(), 0, + [](int sum, const LocationInfo::value_type& e) { + return sum + e.second; + })) { + } + + bool is_343_scheme() const override { + // These tests are for the 3-4-3 replica management scheme only. + return true; + } +}; +INSTANTIATE_TEST_CASE_P(, LocationAwareRebalancingParamTest, + ::testing::ValuesIn(kLaRebalancingParams), + LaRebalancingTestName); +TEST_P(LocationAwareRebalancingParamTest, Rebalance) { + if (!AllowSlowTests()) { + LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run"; + return; + } + const auto& param = GetParam(); + const auto& location_info = param.location_info; + const auto& excluded_locations = param.excluded_locations; + const vector& extra_master_flags = { + // In this test, the only users of the location assignment test script + // are the tablet servers: at this sub-class level it's hard to control + // when the test client connects to the external minicluster. + "--master_client_location_assignment_enabled=false", + + // This test can exercise scenarios with even replication factor for tables. + "--allow_unsafe_replication_factor", + }; + NO_FATALS(Prepare({}, extra_master_flags, location_info, excluded_locations)); + + const vector tool_args = { + "cluster", + "rebalance", + cluster_->master()->bound_rpc_addr().ToString(), + "--output_replica_distribution_details=true", + }; + + // The run of the location-aware rebalancing tool should report the cluster + // as balanced. + { + string out; + string err; + const Status s = RunKuduTool(tool_args, &out, &err); + ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); + ASSERT_STR_CONTAINS(out, "rebalancing is complete: cluster is balanced") + << "stderr: " << err; + if (!param.excluded_locations.empty()) { + // In all location-aware cluster configurations where the replica were + // initially placed everywhere but some 'excluded locations', the cluster + // was unbalanced, so some replicas should have been moved. + ASSERT_STR_NOT_CONTAINS(out, "(moved 0 replicas)"); + } + if (param.replication_factor == 2 || param.location_info.size() == 2 || + (param.location_info.size() == 3 && param.replication_factor == 4)) { + // In case some cases it's impossible to satisfy the placement policy's + // constraints. + ASSERT_STR_CONTAINS(out, "Placement policy violations:\n"); + ASSERT_STR_CONTAINS(out, + "Number of non-complying tables | Number of non-complying tablets\n") + << "stderr: " << err; + // The "--output_replica_distribution_details" flag is set: the tool + // should output details on the violations of the placement policy. + ASSERT_STR_CONTAINS(out, "Placement policy violation details:\n") + << "stderr: " << err; + } else if (param.location_info.size() > 1) { + // In other cases all the violations of the placement policy should be + // corrected (in the case of a single location the rebalancer does not + // check against placement policy violations because that case is treated + // the same as a location-unaware, whole cluster rebalancing). + ASSERT_STR_CONTAINS(out, "Placement policy violations:\n none\n"); + } + } +} + } // namespace tools } // namespace kudu