Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 92BA4200D5A for ; Thu, 14 Dec 2017 20:14:38 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 913D1160C16; Thu, 14 Dec 2017 19:14:38 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id D76E8160BFC for ; Thu, 14 Dec 2017 20:14:37 +0100 (CET) Received: (qmail 4486 invoked by uid 500); 14 Dec 2017 19:14:37 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 4477 invoked by uid 99); 14 Dec 2017 19:14:37 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 14 Dec 2017 19:14:37 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id ABE75DFF75; Thu, 14 Dec 2017 19:14:34 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mpercy@apache.org To: commits@kudu.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: kudu git commit: [quorum_util-test] scenario for 'flapping' replica health Date: Thu, 14 Dec 2017 19:14:34 +0000 (UTC) archived-at: Thu, 14 Dec 2017 19:14:38 -0000 Repository: kudu Updated Branches: refs/heads/master dbc84a2d7 -> 14bf71e73 [quorum_util-test] scenario for 'flapping' replica health Added a simple scenario to verify that the catalog manager does not do anything unexpected in the 3-4-3 replica management mode when the replica's health status is flapping between HEALTHY and UNKNOWN. Change-Id: If7c1929a48a3c467ccd7523b4fbd5c23d4edc7f8 Reviewed-on: http://gerrit.cloudera.org:8080/8828 Tested-by: Kudu Jenkins Reviewed-by: Mike Percy Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/14bf71e7 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/14bf71e7 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/14bf71e7 Branch: refs/heads/master Commit: 14bf71e73a3523f907162265b8bfa9c55c7bcc7c Parents: dbc84a2 Author: Alexey Serbin Authored: Tue Dec 12 22:31:31 2017 -0800 Committer: Mike Percy Committed: Thu Dec 14 19:09:33 2017 +0000 ---------------------------------------------------------------------- src/kudu/consensus/quorum_util-test.cc | 93 +++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/14bf71e7/src/kudu/consensus/quorum_util-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/quorum_util-test.cc b/src/kudu/consensus/quorum_util-test.cc index 3d23bcb..5e6ac0e 100644 --- a/src/kudu/consensus/quorum_util-test.cc +++ b/src/kudu/consensus/quorum_util-test.cc @@ -1220,5 +1220,98 @@ TEST(QuorumUtilTest, NewlyPromotedReplicaCrashes) { EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); } +// A scenario to verify that the catalog manager does not do anything unexpected +// in the 3-4-3 replica management mode when replica's health is flapping +// between HEALTHY and UNKNOWN (e.g., when leader replica changes). +TEST(QuorumUtilTest, ReplicaHealthFlapping) { + constexpr auto kReplicationFactor = 3; + + // The initial tablet report after the tablet replica A has started and + // become the leader. + RaftConfigPB config; + AddPeer(&config, "A", V, '+'); + AddPeer(&config, "B", V, '?'); + AddPeer(&config, "C", V, '?'); + EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // Replica B is reported as healthy. + SetPeerHealth(&config, "B", '+'); + EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // Replica C is reported as healthy. + SetPeerHealth(&config, "C", '+'); + EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // Replica B becomes the new leader. + SetPeerHealth(&config, "A", '?'); + SetPeerHealth(&config, "B", '+'); + SetPeerHealth(&config, "C", '?'); + EXPECT_FALSE(ShouldEvictReplica(config, "B", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // Replica A is reported as healthy; replica C fails. + SetPeerHealth(&config, "A", '+'); + SetPeerHealth(&config, "B", '+'); + SetPeerHealth(&config, "C", '-'); + EXPECT_FALSE(ShouldEvictReplica(config, "B", kReplicationFactor)); + EXPECT_TRUE(ShouldAddReplica(config, kReplicationFactor)); + + // A new non-voter replica has been added to replace failed replica C. + AddPeer(&config, "D", N, '?', {{"PROMOTE", true}}); + EXPECT_FALSE(ShouldEvictReplica(config, "B", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // Replica A becomes the new leader. + SetPeerHealth(&config, "A", '+'); + SetPeerHealth(&config, "B", '?'); + SetPeerHealth(&config, "C", '?'); + SetPeerHealth(&config, "D", '?'); + EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // The new leader has contacted on-line replicas. + SetPeerHealth(&config, "A", '+'); + SetPeerHealth(&config, "B", '+'); + SetPeerHealth(&config, "C", '?'); + SetPeerHealth(&config, "D", '+'); + EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // Replica D catches up with the leader's WAL and gets promoted. + PromotePeer(&config, "D"); + string to_evict; + ASSERT_TRUE(ShouldEvictReplica(config, "A", kReplicationFactor, &to_evict)); + EXPECT_EQ("C", to_evict); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + // Replica D becomes the new leader. + SetPeerHealth(&config, "A", '?'); + SetPeerHealth(&config, "B", '?'); + SetPeerHealth(&config, "C", '?'); + SetPeerHealth(&config, "D", '+'); + EXPECT_FALSE(ShouldEvictReplica(config, "D", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + SetPeerHealth(&config, "A", '+'); + SetPeerHealth(&config, "B", '+'); + SetPeerHealth(&config, "C", '?'); + SetPeerHealth(&config, "D", '+'); + ASSERT_TRUE(ShouldEvictReplica(config, "D", kReplicationFactor, &to_evict)); + EXPECT_EQ("C", to_evict); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + SetPeerHealth(&config, "C", '-'); + ASSERT_TRUE(ShouldEvictReplica(config, "D", kReplicationFactor, &to_evict)); + EXPECT_EQ("C", to_evict); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); + + RemovePeer(&config, "C"); + EXPECT_FALSE(ShouldEvictReplica(config, "D", kReplicationFactor)); + EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor)); +} + } // namespace consensus } // namespace kudu