From dev-return-67948-archive-asf-public=cust-asf.ponee.io@zookeeper.apache.org Fri Mar 2 22:58:07 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id AE130180671 for ; Fri, 2 Mar 2018 22:58:06 +0100 (CET) Received: (qmail 30169 invoked by uid 500); 2 Mar 2018 21:58:03 -0000 Mailing-List: contact dev-help@zookeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@zookeeper.apache.org Delivered-To: mailing list dev@zookeeper.apache.org Received: (qmail 29085 invoked by uid 99); 2 Mar 2018 21:58:03 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 02 Mar 2018 21:58:03 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 19B61F4DD5; Fri, 2 Mar 2018 21:58:01 +0000 (UTC) From: afine To: dev@zookeeper.apache.org Reply-To: dev@zookeeper.apache.org References: In-Reply-To: Subject: [GitHub] zookeeper pull request #476: ZOOKEEPER-2988: NPE triggered if server receive... Content-Type: text/plain Message-Id: <20180302215802.19B61F4DD5@git1-us-west.apache.org> Date: Fri, 2 Mar 2018 21:58:01 +0000 (UTC) Github user afine commented on a diff in the pull request: https://github.com/apache/zookeeper/pull/476#discussion_r171972608 --- Diff: src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java --- @@ -1012,4 +1012,113 @@ public void testFailedTxnAsPartOfQuorumLoss() throws Exception { Assert.assertNull("server " + i + " should not have /zk" + leader, servers.zk[i].exists("/zk" + leader, false)); } } + + /** + * Verify that a node without the leader in its view will not attempt to connect to the leader. + */ + @Test + public void testLeaderOutOfView() throws Exception { + ClientBase.setupTestEnv(); + + Layout layout = new PatternLayout("%d{ISO8601} [,yid:%X{myid}] - %5p [%t:%C{1}@%L] - %m%n"); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + WriterAppender appender = new WriterAppender(layout, os); + appender.setThreshold(Level.DEBUG); + Logger qlogger = Logger.getLogger("org.apache.zookeeper.server.quorum"); + qlogger.addAppender(appender); + + try { + final int CLIENT_PORT_QP1 = PortAssignment.unique(); + final int CLIENT_PORT_QP2 = PortAssignment.unique(); + final int CLIENT_PORT_QP3 = PortAssignment.unique(); + + String quorumCfgIncomplete = getUniquePortCfgForId(1) + "\n" + getUniquePortCfgForId(2); + String quorumCfgComplete = quorumCfgIncomplete + "\n" + getUniquePortCfgForId(3); + + // Node 1 is started without the leader (3) in its config view + MainThread q1 = new MainThread(1, CLIENT_PORT_QP1, quorumCfgIncomplete); + MainThread q2 = new MainThread(2, CLIENT_PORT_QP2, quorumCfgComplete); + MainThread q3 = new MainThread(3, CLIENT_PORT_QP3, quorumCfgComplete); + + // Node 1 must be started first, before quorum is formed, to trigger the attempted invalid connection to 3 + q1.start(); + QuorumPeer quorumPeer1 = waitForQuorumPeer(q1, CONNECTION_TIMEOUT); + Assert.assertTrue(quorumPeer1.getPeerState() == QuorumPeer.ServerState.LOOKING); + + // Node 3 started second to avoid 1 and 2 forming a quorum before 3 starts up + q3.start(); + QuorumPeer quorumPeer3 = waitForQuorumPeer(q3, CONNECTION_TIMEOUT); + Assert.assertTrue(quorumPeer3.getPeerState() == QuorumPeer.ServerState.LOOKING); + + // Node 2 started last, kicks off leader election + q2.start(); + + // Nodes 2 and 3 now form quorum and fully start. 1 attempts to vote for 3, fails, returns to LOOKING state + Assert.assertTrue("waiting for server 2 to start", + ClientBase.waitForServerUp("127.0.0.1:" + CLIENT_PORT_QP2, CONNECTION_TIMEOUT)); + Assert.assertTrue("waiting for server 3 to start", + ClientBase.waitForServerUp("127.0.0.1:" + CLIENT_PORT_QP3, CONNECTION_TIMEOUT)); + + Assert.assertTrue(q1.getQuorumPeer().getPeerState() == QuorumPeer.ServerState.LOOKING); + Assert.assertTrue(q2.getQuorumPeer().getPeerState() == QuorumPeer.ServerState.FOLLOWING); + Assert.assertTrue(q3.getQuorumPeer().getPeerState() == QuorumPeer.ServerState.LEADING); + + q1.shutdown(); --- End diff -- is there a way we can use the existing tearDown code. So we make sure we never leave any servers running even if an assertion fails? ---