Author: breed
Date: Wed Sep 24 14:27:50 2008
New Revision: 698743
URL: http://svn.apache.org/viewvc?rev=698743&view=rev
Log:
ZOOKEEPER-117 threading issues in Leader election
Modified:
hadoop/zookeeper/trunk/CHANGES.txt
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java
Modified: hadoop/zookeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=698743&r1=698742&r2=698743&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/CHANGES.txt (original)
+++ hadoop/zookeeper/trunk/CHANGES.txt Wed Sep 24 14:27:50 2008
@@ -66,3 +66,6 @@
again. (breed via mahadev)
ZOOKEEPER-137. client watcher objects can lose events (Patrick Hunt via breed)
+
+ ZOOKEEPER-117. threading issues in Leader election (Flavio Junqueira and Patrick
+ Hunt via breed)
Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java?rev=698743&r1=698742&r2=698743&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java Wed
Sep 24 14:27:50 2008
@@ -24,6 +24,7 @@
import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketAddress;
+import java.net.SocketException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -59,6 +60,9 @@
QuorumPeer self;
+ // the follower acceptor thread
+ FollowerCnxAcceptor cnxAcceptor;
+
// list of all the followers
public HashSet<FollowerHandler> followers = new HashSet<FollowerHandler>();
@@ -194,6 +198,42 @@
ConcurrentLinkedQueue<Proposal> toBeApplied = new ConcurrentLinkedQueue<Proposal>();
Proposal newLeaderProposal = new Proposal();
+
+ class FollowerCnxAcceptor extends Thread{
+ private volatile boolean stop = false;
+
+ @Override
+ public void run() {
+ try {
+ while (!stop) {
+ try{
+ Socket s = ss.accept();
+ s.setSoTimeout(self.tickTime * self.syncLimit);
+ s.setTcpNoDelay(true);
+ new FollowerHandler(s, Leader.this);
+ } catch (SocketException e) {
+ if (stop) {
+ LOG.info("exception while shutting down acceptor: "
+ + e);
+
+ // When Leader.shutdown() calls ss.close(),
+ // the call to accept throws an exception.
+ // We catch and set stop to true.
+ stop = true;
+ } else {
+ throw e;
+ }
+ }
+ }
+ } catch (Exception e) {
+ LOG.warn("Exception while accepting follower", e);
+ }
+ }
+
+ public void halt() {
+ stop = true;
+ }
+ }
/**
* This method is main function that is called to lead
@@ -217,21 +257,12 @@
+ newLeaderProposal.packet.getZxid());
}
outstandingProposals.add(newLeaderProposal);
- new Thread() {
- @Override
- public void run() {
- try {
- while (true) {
- Socket s = ss.accept();
- s.setSoTimeout(self.tickTime * self.syncLimit);
- s.setTcpNoDelay(true);
- new FollowerHandler(s, Leader.this);
- }
- } catch (Exception e) {
- LOG.warn("Exception while accepting follower", e);
- }
- }
- }.start();
+
+ // Start thread that waits for connection requests from
+ // new followers.
+ cnxAcceptor = new FollowerCnxAcceptor();
+ cnxAcceptor.start();
+
// We have to get at least a majority of servers in sync with
// us. We do this by waiting for the NEWLEADER packet to get
// acknowledged
@@ -256,9 +287,12 @@
self.cnxnFactory.setZooKeeperServer(zk);
}
// Everything is a go, simply start counting the ticks
- synchronized (this) {
- notifyAll();
- }
+ // WARNING: I couldn't find any wait statement on a synchronized
+ // block that would be notified by this notifyAll() call, so
+ // I commented it out
+ //synchronized (this) {
+ // notifyAll();
+ //}
// We ping twice a tick, so we only update the tick every other
// iteration
boolean tickSkip = true;
@@ -299,9 +333,12 @@
if (isShutdown) {
return;
}
+
+ LOG.info("Shutdown called",
+ new Exception("shutdown Leader! reason: " + reason));
- LOG.error("FIXMSG",new Exception("shutdown Leader! reason: "
- + reason));
+ cnxAcceptor.halt();
+
// NIO should not accept conenctions
self.cnxnFactory.setZooKeeperServer(null);
// clear all the connections
|