Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 414AD200D5B for ; Wed, 29 Nov 2017 02:15:55 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 3FF01160C15; Wed, 29 Nov 2017 01:15:55 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 6040D160BE7 for ; Wed, 29 Nov 2017 02:15:54 +0100 (CET) Received: (qmail 24710 invoked by uid 500); 29 Nov 2017 01:15:53 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 24701 invoked by uid 99); 29 Nov 2017 01:15:53 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Nov 2017 01:15:53 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 829CFDFA0F; Wed, 29 Nov 2017 01:15:52 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mpercy@apache.org To: commits@kudu.apache.org Message-Id: <405a8d44a42b4d7db58ea1d4cf78c764@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: kudu git commit: KUDU-1097: 'gone-and-back tablet server' test scenario Date: Wed, 29 Nov 2017 01:15:52 +0000 (UTC) archived-at: Wed, 29 Nov 2017 01:15:55 -0000 Repository: kudu Updated Branches: refs/heads/master e3cf62fbb -> 27da8323d KUDU-1097: 'gone-and-back tablet server' test scenario Added a new test scenario for the new 3-4-3 re-replication scheme. The scenario addresses the situation when a tablet server has not been running for some time (e.g., a bit over the time interval specified by the 'follower_unavailable_considered_failed_sec' flag), and then it comes back before the newly added non-voter replicas are promoted. As a result, the original voter replicas from the tablet server should stay, but the newly added non-voter replicas should be evicted. Change-Id: I35eb6a0c7de5bfef962b5e96857c3f9c85a1a7b0 Reviewed-on: http://gerrit.cloudera.org:8080/8664 Tested-by: Alexey Serbin Reviewed-by: Mike Percy Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/27da8323 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/27da8323 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/27da8323 Branch: refs/heads/master Commit: 27da8323d37bcf9f2075763d51e3adfbb75c73f4 Parents: e3cf62f Author: Alexey Serbin Authored: Sun Nov 26 15:59:28 2017 -0800 Committer: Mike Percy Committed: Wed Nov 29 01:15:27 2017 +0000 ---------------------------------------------------------------------- .../raft_consensus_nonvoter-itest.cc | 125 ++++++++++++++++++- 1 file changed, 122 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/27da8323/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc index e8882c3..4853e5f 100644 --- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc +++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc @@ -29,6 +29,7 @@ #include "kudu/client/client.h" #include "kudu/client/shared_ptr.h" #include "kudu/consensus/metadata.pb.h" +#include "kudu/consensus/quorum_util.h" #include "kudu/gutil/gscoped_ptr.h" #include "kudu/gutil/map-util.h" #include "kudu/gutil/strings/substitute.h" @@ -60,6 +61,7 @@ using kudu::cluster::ExternalDaemon; using kudu::cluster::ExternalMaster; using kudu::cluster::ExternalTabletServer; using kudu::consensus::RaftPeerPB; +using kudu::consensus::IsRaftConfigMember; using kudu::itest::AddServer; using kudu::itest::GetInt64Metric; using kudu::itest::LeaderStepDown; @@ -112,6 +114,10 @@ class RaftConsensusNonVoterITest : public RaftConsensusITestBase { const string& tablet_id, const TServerDetails* replica, const MonoDelta& timeout); + + // Get a tablet server with at least one replica of the test tablet identified + // by the 'tablet_id_' member. + ExternalTabletServer* GetServerWithReplica() const; }; Status RaftConsensusNonVoterITest::GetTabletCopySourceSessionsCount( @@ -191,6 +197,17 @@ Status RaftConsensusNonVoterITest::ChangeReplicaMembership( tablet_id, replica->uuid())); } +ExternalTabletServer* RaftConsensusNonVoterITest::GetServerWithReplica() const { + ExternalTabletServer* ts = nullptr; + for (const auto& e : tablet_replicas_) { + if (e.first == tablet_id_) { + ts = cluster_->tablet_server_by_uuid(e.second->uuid()); + break; + } + } + return ts; +} + // Ensure that adding a NON_VOTER replica is properly handled by the system: // // * Updating Raft configuration for tablet by adding a NON_VOTER replica @@ -1022,9 +1039,9 @@ TEST_F(RaftConsensusNonVoterITest, CatalogManagerAddsNonVoter) { ASSERT_EQ(kReplicasNum + 1, tablet_servers_.size()); ASSERT_EQ(kReplicasNum, tablet_replicas_.size()); - ExternalTabletServer* ts0 = cluster_->tablet_server(0); - ASSERT_NE(nullptr, ts0); - ts0->Shutdown(); + ExternalTabletServer* ts_with_replica = GetServerWithReplica(); + ASSERT_NE(nullptr, ts_with_replica); + ts_with_replica->Shutdown(); // Wait for a new non-voter replica added by the catalog manager to // replace the failed one. @@ -1040,5 +1057,107 @@ TEST_F(RaftConsensusNonVoterITest, CatalogManagerAddsNonVoter) { NO_FATALS(cluster_->AssertNoCrashes()); } +// Verify the behavior of the catalog manager for the gone-and-back tablet +// server in --raft_prepare_replacement_before_eviction=true case. This scenario +// addresses the situation when a tablet server hosting tablet replicas has not +// been running for some time (e.g., a bit over the time interval specified by +// the 'follower_unavailable_considered_failed_sec' flag), and then it comes +// back before the newly added non-voter replicas are promoted. As a result, the +// original voter replicas from the tablet server should stay, but the newly +// added non-voter replicas should be evicted. +TEST_F(RaftConsensusNonVoterITest, TabletServerIsGoneAndBack) { + if (!AllowSlowTests()) { + LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run"; + return; + } + + const auto kReplicasNum = 3; + const auto kReplicaUnavailableSec = 5; + const auto kTimeoutSec = 60; + const auto kTimeout = MonoDelta::FromSeconds(kTimeoutSec); + FLAGS_num_replicas = kReplicasNum; + // Need one extra tserver after the tserver with on of the replicas stopped. + // Otherwise, the catalog manager would not be able to spawn a new non-voter + // replacement replicas elsewhere. + FLAGS_num_tablet_servers = kReplicasNum + 1; + const vector kMasterFlags = { + // The scenario runs with the 3-4-3 replica management scheme. + "--raft_prepare_replacement_before_eviction=true", + }; + const vector kTserverFlags = { + // The scenario runs with the 3-4-3 replica management scheme. + "--raft_prepare_replacement_before_eviction=true", + Substitute("--follower_unavailable_considered_failed_sec=$0", + kReplicaUnavailableSec), + // Slow down tablet copy to avoid new non-voter replicas catching up with + // the leader replicas, otherwise they might be promoted to voters before + // the replicas from the 'failed' tablet server is back. + Substitute("--tablet_copy_download_file_inject_latency_ms=$0", + MonoDelta::FromSeconds(3 * kTimeoutSec).ToMilliseconds()), + // Don't wait for the RPC timeout for too long. + Substitute("--consensus_rpc_timeout_ms=$0", 1000 * kReplicaUnavailableSec), + }; + + NO_FATALS(BuildAndStart(kTserverFlags, kMasterFlags)); + ASSERT_EQ(kReplicasNum + 1, tablet_servers_.size()); + ASSERT_EQ(kReplicasNum, tablet_replicas_.size()); + + // Create a test table and insert some data into the table, + // so the special flag --tablet_copy_download_file_inject_latency_ms + // could take affect while tablet copy happens down the road. + TestWorkload workload(cluster_.get()); + workload.set_table_name(kTableId); + workload.Setup(); + workload.Start(); + while (workload.rows_inserted() < 10) { + SleepFor(MonoDelta::FromMilliseconds(10)); + } + workload.StopAndJoin(); + + ExternalTabletServer* ts_with_replica = GetServerWithReplica(); + ASSERT_NE(nullptr, ts_with_replica); + ts_with_replica->Shutdown(); + + // The leader replica marks the non-responsive replica as failed after + // FLAGS_follower_unavailable_considered_failed_sec time interval. The + // catalog manager should spot that and add a new non-voter replica as a + // replacement. + bool has_leader = false; + TabletLocationsPB tablet_locations; + ASSERT_OK(WaitForReplicasReportedToMaster(cluster_->master_proxy(), + kReplicasNum + 1, + tablet_id_, + kTimeout, + WAIT_FOR_LEADER, + &has_leader, + &tablet_locations)); + + // Restart the tablet server with the replica which has been marked as failed. + ASSERT_OK(ts_with_replica->Restart()); + + // Since the new non-voter replica is still not ready for promotion because + // the tablet copy is in progress, and all the original voter replicas are in + // good health, the catalog manager should evict an excess non-voter replica. + ASSERT_OK(WaitForReplicasReportedToMaster(cluster_->master_proxy(), + kReplicasNum, + tablet_id_, + kTimeout, + WAIT_FOR_LEADER, + &has_leader, + &tablet_locations)); + // Make sure the replica from the gone-and-back server is part of the config. + consensus::ConsensusStatePB cstate; + ASSERT_EVENTUALLY([&] { + TServerDetails* leader = nullptr; + ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &leader)); + // The reason for the outside ASSERT_EVENTUALLY is that the leader might + // have changed in between of these two calls. + ASSERT_OK(GetConsensusState(leader, tablet_id_, kTimeout, &cstate)); + }); + ASSERT_TRUE(IsRaftConfigMember(ts_with_replica->uuid(), cstate.committed_config())); + + NO_FATALS(cluster_->AssertNoCrashes()); +} + } // namespace tserver } // namespace kudu