hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject hbase git commit: HBASE-14498 Master stuck in infinite loop when all Zookeeper servers are unreachable (Pankaj Kumar)
Date Thu, 12 Nov 2015 15:21:27 GMT
Repository: hbase
Updated Branches:
  refs/heads/master 1f62a4872 -> b677f2e65


HBASE-14498 Master stuck in infinite loop when all Zookeeper servers are unreachable (Pankaj
Kumar)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b677f2e6
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b677f2e6
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b677f2e6

Branch: refs/heads/master
Commit: b677f2e65d07194702fc181c8fd777804fa967ae
Parents: 1f62a48
Author: tedyu <yuzhihong@gmail.com>
Authored: Thu Nov 12 07:21:19 2015 -0800
Committer: tedyu <yuzhihong@gmail.com>
Committed: Thu Nov 12 07:21:19 2015 -0800

----------------------------------------------------------------------
 .../hbase/zookeeper/ZooKeeperWatcher.java       | 42 ++++++++++++++++-
 .../hbase/zookeeper/TestZooKeeperWatcher.java   | 49 +++++++++++++++++++-
 2 files changed, 88 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/b677f2e6/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java
index f7a2175..8f80255 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java
@@ -26,6 +26,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.security.Superusers;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
@@ -74,7 +76,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
   private RecoverableZooKeeper recoverableZooKeeper;
 
   // abortable in case of zk failure
-  protected Abortable abortable;
+  protected final Abortable abortable;
   // Used if abortable is null
   private boolean aborted = false;
 
@@ -85,6 +87,10 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable
{
   // Used by ZKUtil:waitForZKConnectionIfAuthenticating to wait for SASL
   // negotiation to complete
   public CountDownLatch saslLatch = new CountDownLatch(1);
+  
+  // Connection timeout on disconnect event
+  private long connWaitTimeOut;
+  private AtomicBoolean isConnected = new AtomicBoolean(false);
 
   // node names
 
@@ -167,6 +173,9 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable
{
     this.identifier = identifier + "0x0";
     this.abortable = abortable;
     setNodeNames(conf);
+    // On Disconnected event a thread will wait for sometime (2/3 of zookeeper.session.timeout),
+    // it will abort the process if no SyncConnected event reported by the time.
+    connWaitTimeOut = this.conf.getLong("zookeeper.session.timeout", 90000) * 2 / 3;
     this.recoverableZooKeeper = ZKUtil.connect(conf, quorum, this, identifier);
     if (canCreateBaseZNode) {
       createBaseZNodes();
@@ -596,6 +605,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable
{
   private void connectionEvent(WatchedEvent event) {
     switch(event.getState()) {
       case SyncConnected:
+        isConnected.set(true);
         // Now, this callback can be invoked before the this.zookeeper is set.
         // Wait a little while.
         long finished = System.currentTimeMillis() +
@@ -625,7 +635,35 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable
{
 
       // Abort the server if Disconnected or Expired
       case Disconnected:
-        LOG.debug(prefix("Received Disconnected from ZooKeeper, ignoring"));
+        LOG.debug("Received Disconnected from ZooKeeper.");
+        isConnected.set(false);
+        
+        Thread t = new Thread() { 
+          public void run() {
+            long startTime = EnvironmentEdgeManager.currentTime();
+            while (EnvironmentEdgeManager.currentTime() - startTime < connWaitTimeOut)
{
+              if (isConnected.get()) {
+                LOG.debug("Client got reconnected to zookeeper.");
+                return;
+              }
+              try {
+                Thread.sleep(100);
+              } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                break;
+              }
+            }
+            
+          if (!isConnected.get() && abortable != null) {
+            String msg =
+                prefix("Couldn't connect to ZooKeeper after waiting " + connWaitTimeOut
+                    + " ms, aborting");
+            abortable.abort(msg, new KeeperException.ConnectionLossException());
+          }
+          };
+        };
+        t.setDaemon(true);
+        t.start();
         break;
 
       case Expired:

http://git-wip-us.apache.org/repos/asf/hbase/blob/b677f2e6/hbase-client/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperWatcher.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperWatcher.java
b/hbase-client/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperWatcher.java
index 10a3816..8071b03 100644
--- a/hbase-client/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperWatcher.java
+++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperWatcher.java
@@ -22,15 +22,23 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
 @Category({SmallTests.class})
 public class TestZooKeeperWatcher {
-
+  private final static Log LOG = LogFactory.getLog(TestZooKeeperWatcher.class);
+  
   @Test
   public void testIsClientReadable() throws ZooKeeperConnectionException, IOException {
     ZooKeeperWatcher watcher = new ZooKeeperWatcher(HBaseConfiguration.create(),
@@ -56,5 +64,44 @@ public class TestZooKeeperWatcher {
 
     watcher.close();
   }
+  
+  @Test
+  public void testConnectionEvent() throws ZooKeeperConnectionException, IOException {
+    long zkSessionTimeout = 15000l;
+    Configuration conf = HBaseConfiguration.create();
+    conf.set("zookeeper.session.timeout", "15000");
+
+    Abortable abortable = new Abortable() {
+      boolean aborted = false;
+
+      @Override
+      public void abort(String why, Throwable e) {
+        aborted = true;
+        LOG.error(why, e);
+      }
+
+      @Override
+      public boolean isAborted() {
+        return aborted;
+      }
+    };
+    ZooKeeperWatcher watcher = new ZooKeeperWatcher(conf, "testConnectionEvent", abortable,
false);
 
+    WatchedEvent event =
+        new WatchedEvent(Watcher.Event.EventType.None, Watcher.Event.KeeperState.Disconnected,
null);
+
+    long startTime = EnvironmentEdgeManager.currentTime();
+    while (!abortable.isAborted()
+        && (EnvironmentEdgeManager.currentTime() - startTime < zkSessionTimeout))
{
+      watcher.process(event);
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
+    }
+
+    assertTrue(abortable.isAborted());
+    watcher.close();
+  }
 }


Mime
View raw message