Return-Path: Delivered-To: apmail-hadoop-zookeeper-commits-archive@minotaur.apache.org Received: (qmail 62295 invoked from network); 20 Feb 2010 14:26:31 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 20 Feb 2010 14:26:31 -0000 Received: (qmail 79310 invoked by uid 500); 20 Feb 2010 14:26:31 -0000 Delivered-To: apmail-hadoop-zookeeper-commits-archive@hadoop.apache.org Received: (qmail 79269 invoked by uid 500); 20 Feb 2010 14:26:31 -0000 Mailing-List: contact zookeeper-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: zookeeper-dev@ Delivered-To: mailing list zookeeper-commits@hadoop.apache.org Received: (qmail 79258 invoked by uid 99); 20 Feb 2010 14:26:31 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 20 Feb 2010 14:26:31 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 20 Feb 2010 14:26:21 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id ECBD12388900; Sat, 20 Feb 2010 14:26:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r912119 - in /hadoop/zookeeper/trunk: ./ src/java/main/org/apache/zookeeper/server/quorum/ src/java/test/org/apache/zookeeper/test/ Date: Sat, 20 Feb 2010 14:26:00 -0000 To: zookeeper-commits@hadoop.apache.org From: fpj@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100220142600.ECBD12388900@eris.apache.org> Author: fpj Date: Sat Feb 20 14:26:00 2010 New Revision: 912119 URL: http://svn.apache.org/viewvc?rev=912119&view=rev Log: ZOOKEEPER-569. Failure of elected leader can lead to never-ending leader election (henry via flavio) Added: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java Modified: hadoop/zookeeper/trunk/CHANGES.txt hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java Modified: hadoop/zookeeper/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=912119&r1=912118&r2=912119&view=diff ============================================================================== --- hadoop/zookeeper/trunk/CHANGES.txt (original) +++ hadoop/zookeeper/trunk/CHANGES.txt Sat Feb 20 14:26:00 2010 @@ -228,6 +228,9 @@ ZOOKEEPER-668. Close method in LedgerInputStream doesn't do anything (flavio via mahadev) + ZOOKEEPER-569. Failure of elected leader can lead to never-ending leader + election (henry via flavio) + IMPROVEMENTS: ZOOKEEPER-473. cleanup junit tests to eliminate false positives due to "socket reuse" and failure to close client (phunt via mahadev) Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java?rev=912119&r1=912118&r2=912119&view=diff ============================================================================== --- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java (original) +++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java Sat Feb 20 14:26:00 2010 @@ -198,40 +198,50 @@ // down } } + ElectionResult result = countVotes(votes, heardFrom); - if (result.winner.id >= 0) { - self.setCurrentVote(result.vote); - // To do: this doesn't use a quorum verifier - if (result.winningCount > (self.getVotingView().size() / 2)) { - self.setCurrentVote(result.winner); - s.close(); - Vote current = self.getCurrentVote(); - LOG.info("Found leader: my type is: " + self.getPeerType()); - /** - * We want to make sure we implement the state machine - * correctly. If we are a PARTICIPANT, once a leader - * is elected we can move either to LEADING or - * FOLLOWING. However if we are an OBSERVER, it is an - * error to be elected as a Leader. - */ - if (self.getPeerType() == LearnerType.OBSERVER) { - if (current.id == self.getId()) { - // This should never happen! - LOG.error("OBSERVER elected as leader!"); - Thread.sleep(100); - } - else { - self.setPeerState(ServerState.OBSERVING); - Thread.sleep(100); + // ZOOKEEPER-569: + // If no votes are received for live peers, reset to voting + // for ourselves as otherwise we may hang on to a vote + // for a dead peer + if (votes.size() == 0) { + self.setCurrentVote(new Vote(self.getId(), + self.getLastLoggedZxid())); + } else { + if (result.winner.id >= 0) { + self.setCurrentVote(result.vote); + // To do: this doesn't use a quorum verifier + if (result.winningCount > (self.getVotingView().size() / 2)) { + self.setCurrentVote(result.winner); + s.close(); + Vote current = self.getCurrentVote(); + LOG.info("Found leader: my type is: " + self.getPeerType()); + /* + * We want to make sure we implement the state machine + * correctly. If we are a PARTICIPANT, once a leader + * is elected we can move either to LEADING or + * FOLLOWING. However if we are an OBSERVER, it is an + * error to be elected as a Leader. + */ + if (self.getPeerType() == LearnerType.OBSERVER) { + if (current.id == self.getId()) { + // This should never happen! + LOG.error("OBSERVER elected as leader!"); + Thread.sleep(100); + } + else { + self.setPeerState(ServerState.OBSERVING); + Thread.sleep(100); + return current; + } + } else { + self.setPeerState((current.id == self.getId()) + ? ServerState.LEADING: ServerState.FOLLOWING); + if (self.getPeerState() == ServerState.FOLLOWING) { + Thread.sleep(100); + } return current; } - } else { - self.setPeerState((current.id == self.getId()) - ? ServerState.LEADING: ServerState.FOLLOWING); - if (self.getPeerState() == ServerState.FOLLOWING) { - Thread.sleep(100); - } - return current; } } } Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java?rev=912119&r1=912118&r2=912119&view=diff ============================================================================== --- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java (original) +++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java Sat Feb 20 14:26:00 2010 @@ -501,7 +501,7 @@ //TODO: use a factory rather than a switch switch (electionAlgorithm) { case 0: - // will create a new instance for each run of the protocol + le = new LeaderElection(this); break; case 1: le = new AuthFastLeaderElection(this); @@ -527,10 +527,9 @@ protected Election makeLEStrategy(){ LOG.debug("Initializing leader election protocol..."); - - if(electionAlg==null){ - return new LeaderElection(this); - } + if (getElectionType() == 0) { + electionAlg = new LeaderElection(this); + } return electionAlg; } Added: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java?rev=912119&view=auto ============================================================================== --- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java (added) +++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java Sat Feb 20 14:26:00 2010 @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.test; + +import java.io.File; +import java.io.IOException; +import java.net.DatagramPacket; +import java.net.DatagramSocket; +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; +import java.util.HashMap; + +import junit.framework.TestCase; + +import org.apache.log4j.Logger; +import org.apache.zookeeper.PortAssignment; +import org.apache.zookeeper.server.quorum.QuorumPeer; +import org.apache.zookeeper.server.quorum.Vote; +import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; +import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState; +import org.junit.Test; + +/** + * Tests that a particular run of LeaderElection terminates correctly. + */ +public class LENonTerminateTest extends TestCase { + protected static final Logger LOG = Logger.getLogger(FLELostMessageTest.class); + + int count; + HashMap peers; + File tmpdir[]; + int port[]; + + @Override + public void setUp() throws Exception { + count = 3; + + peers = new HashMap(count); + tmpdir = new File[count]; + port = new int[count]; + + LOG.info("SetUp " + getName()); + } + + @Override + public void tearDown() throws Exception { + LOG.info("FINISHED " + getName()); + } + + + class LEThread extends Thread { + int i; + QuorumPeer peer; + + LEThread(QuorumPeer peer, int i) { + this.i = i; + this.peer = peer; + LOG.info("Constructor: " + getName()); + + } + + public void run(){ + try{ + Vote v = null; + peer.setPeerState(ServerState.LOOKING); + LOG.info("Going to call leader election: " + i); + v = peer.getElectionAlg().lookForLeader(); + + if (v == null){ + fail("Thread " + i + " got a null vote"); + } + + /* + * A real zookeeper would take care of setting the current vote. Here + * we do it manually. + */ + peer.setCurrentVote(v); + + LOG.info("Finished election: " + i + ", " + v.id); + } catch (Exception e) { + e.printStackTrace(); + } + LOG.info("Joining"); + } + } + + /** + * This tests ZK-569. + * With three peers A, B and C, the following could happen: + * 1. Round 1, A,B and C all vote for themselves + * 2. Round 2, C dies, A and B vote for C + * 3. Because C has died, votes for it are ignored, but A and B never + * reset their votes. Hence LE never terminates. ZK-569 fixes this by + * resetting votes to themselves if the set of votes for live peers is null. + */ + @Test + public void testNonTermination() throws Exception { + LOG.info("TestNonTermination: " + getName()+ ", " + count); + for(int i = 0; i < count; i++) { + int clientport = PortAssignment.unique(); + peers.put(Long.valueOf(i), + new QuorumServer(i, + new InetSocketAddress(clientport), + new InetSocketAddress(PortAssignment.unique()))); + tmpdir[i] = ClientBase.createTmpDir(); + port[i] = clientport; + } + + /* + * peer1 and peer2 are A and B in the above example. + */ + QuorumPeer peer1 = new QuorumPeer(peers, tmpdir[0], tmpdir[0], port[0], 0, 0, 2, 2, 2); + peer1.startLeaderElection(); + LEThread thread1 = new LEThread(peer1, 0); + thread1.start(); + + QuorumPeer peer2 = new QuorumPeer(peers, tmpdir[1], tmpdir[1], port[1], 0, 1, 2, 2, 2); + peer2.startLeaderElection(); + LEThread thread2 = new LEThread(peer2, 1); + thread2.start(); + + /* + * Start mock server. + */ + Thread thread3 = new Thread() { + public void run() { + try { + mockServer(); + } catch (Exception e) { + LOG.error(e); + fail("Exception when running mocked server " + e); + } + } + }; + + thread3.start(); + /* + * Occasionally seen false negatives with a 5s timeout. + */ + thread1.join(15000); + thread2.join(15000); + thread3.join(15000); + if (thread1.isAlive() || thread2.isAlive() || thread3.isAlive()) { + fail("Threads didn't join"); + } + } + + /** + * MockServer plays the role of peer C. Respond to two requests for votes + * with vote for self and then fail. + */ + void mockServer() throws InterruptedException, IOException { + byte b[] = new byte[36]; + ByteBuffer responseBuffer = ByteBuffer.wrap(b); + DatagramPacket packet = new DatagramPacket(b, b.length); + QuorumServer server = peers.get(Long.valueOf(2)); + DatagramSocket udpSocket = new DatagramSocket(server.addr.getPort()); + Vote current = new Vote(2, 1); + for (int i=0;i<2;++i) { + udpSocket.receive(packet); + responseBuffer.clear(); + responseBuffer.getInt(); // Skip the xid + responseBuffer.putLong(2); + + responseBuffer.putLong(current.id); + responseBuffer.putLong(current.zxid); + packet.setData(b); + udpSocket.send(packet); + } + } +} Modified: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java?rev=912119&r1=912118&r2=912119&view=diff ============================================================================== --- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java (original) +++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java Sat Feb 20 14:26:00 2010 @@ -26,6 +26,7 @@ import junit.framework.TestCase; +import org.apache.log4j.Logger; import org.apache.zookeeper.PortAssignment; import org.apache.zookeeper.server.quorum.LeaderElection; import org.apache.zookeeper.server.quorum.QuorumPeer; @@ -33,6 +34,7 @@ import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; public class LETest extends TestCase { + private static final Logger LOG = Logger.getLogger(LETest.class); volatile Vote votes[]; volatile boolean leaderDies; volatile long leader = -1; @@ -57,7 +59,7 @@ if (leaderDies) { leaderDies = false; peer.stopLeaderElection(); - System.out.println("Leader " + i + " dying"); + LOG.info("Leader " + i + " dying"); leader = -2; } else { leader = i; @@ -77,7 +79,7 @@ Thread.sleep(rand.nextInt(1000)); peer.setCurrentVote(new Vote(peer.getId(), 0)); } - System.out.println("Thread " + i + " votes " + v); + LOG.info("Thread " + i + " votes " + v); } catch (InterruptedException e) { e.printStackTrace(); } @@ -129,5 +131,5 @@ } } } - } + } }