hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject git commit: HBASE-12376 HBaseAdmin leaks ZK connections if failure starting watchers (ConnectionLossException)
Date Wed, 29 Oct 2014 21:57:50 GMT
Repository: hbase
Updated Branches:
  refs/heads/0.94 45eb18ba5 -> 23c71579c


HBASE-12376 HBaseAdmin leaks ZK connections if failure starting watchers (ConnectionLossException)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/23c71579
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/23c71579
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/23c71579

Branch: refs/heads/0.94
Commit: 23c71579caf4884aaa6bea894e647c643afbc64c
Parents: 45eb18b
Author: stack <stack@apache.org>
Authored: Wed Oct 29 14:57:31 2014 -0700
Committer: stack <stack@apache.org>
Committed: Wed Oct 29 14:57:31 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/catalog/CatalogTracker.java    | 24 +++++++----
 .../apache/hadoop/hbase/client/HBaseAdmin.java  | 34 ++++++++++++++--
 .../apache/hadoop/hbase/client/TestAdmin.java   | 43 ++++++++++++++++++++
 3 files changed, 91 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/23c71579/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
index dcdd53c..912da55 100644
--- a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
+++ b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
@@ -26,6 +26,8 @@ import java.net.SocketTimeoutException;
 import java.net.UnknownHostException;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -48,7 +50,7 @@ import org.apache.hadoop.ipc.RemoteException;
 /**
  * Tracks the availability of the catalog tables <code>-ROOT-</code> and
  * <code>.META.</code>.
- * 
+ *
  * This class is "read-only" in that the locations of the catalog tables cannot
  * be explicitly set.  Instead, ZooKeeper is used to learn of the availability
  * and location of <code>-ROOT-</code>.  <code>-ROOT-</code> is used
to learn of
@@ -65,7 +67,7 @@ public class CatalogTracker {
   // servers when they needed to know of root and meta movement but also by
   // client-side (inside in HTable) so rather than figure root and meta
   // locations on fault, the client would instead get notifications out of zk.
-  // 
+  //
   // But this original intent is frustrated by the fact that this class has to
   // read an hbase table, the -ROOT- table, to figure out the .META. region
   // location which means we depend on an HConnection.  HConnection will do
@@ -151,7 +153,7 @@ public class CatalogTracker {
    * @param abortable If fatal exception we'll call abort on this.  May be null.
    * If it is we'll use the Connection associated with the passed
    * {@link Configuration} as our Abortable.
-   * @throws IOException 
+   * @throws IOException
    */
   public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf,
       Abortable abortable)
@@ -205,7 +207,7 @@ public class CatalogTracker {
    * Determines current availability of catalog tables and ensures all further
    * transitions of either region are tracked.
    * @throws IOException
-   * @throws InterruptedException 
+   * @throws InterruptedException
    */
   public void start() throws IOException, InterruptedException {
     LOG.debug("Starting catalog tracker " + this);
@@ -220,6 +222,14 @@ public class CatalogTracker {
   }
 
   /**
+   * @return True if we are stopped. Call only after start else indeterminate answer.
+   */
+  @VisibleForTesting
+  public boolean isStopped() {
+    return this.stopped;
+  }
+
+  /**
    * Stop working.
    * Interrupts any ongoing waits.
    */
@@ -253,7 +263,7 @@ public class CatalogTracker {
    * not currently available.
    * @return {@link ServerName} for server hosting <code>-ROOT-</code> or null
    * if none available
-   * @throws InterruptedException 
+   * @throws InterruptedException
    */
   public ServerName getRootLocation() throws InterruptedException {
     return this.rootRegionTracker.getRootRegionLocation();
@@ -535,7 +545,7 @@ public class CatalogTracker {
       } else {
         throw ioe;
       }
-      
+
     }
     return protocol;
   }
@@ -595,7 +605,7 @@ public class CatalogTracker {
    * the internal call to {@link #waitForRootServerConnection(long)}.
    * @return True if the <code>-ROOT-</code> location is healthy.
    * @throws IOException
-   * @throws InterruptedException 
+   * @throws InterruptedException
    */
   public boolean verifyRootRegionLocation(final long timeout)
   throws InterruptedException, IOException {

http://git-wip-us.apache.org/repos/asf/hbase/blob/23c71579/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
index 030e6c6..c30b676 100644
--- a/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
+++ b/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
@@ -33,6 +33,8 @@ import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.regex.Pattern;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -182,21 +184,47 @@ public class HBaseAdmin implements Abortable, Closeable {
    * @throws IOException
    * @see #cleanupCatalogTracker(CatalogTracker)
    */
-  private synchronized CatalogTracker getCatalogTracker()
+  @VisibleForTesting
+  synchronized CatalogTracker getCatalogTracker()
   throws ZooKeeperConnectionException, IOException {
+    boolean succeeded = false;
     CatalogTracker ct = null;
     try {
       ct = new CatalogTracker(this.conf);
-      ct.start();
+      startCatalogTracker(ct);
+      succeeded = true;
     } catch (InterruptedException e) {
       // Let it out as an IOE for now until we redo all so tolerate IEs
       Thread.currentThread().interrupt();
       throw new IOException("Interrupted", e);
+    } finally {
+      // If we did not succeed but created a catalogtracker, clean it up. CT has a ZK instance
+      // in it and we'll leak if we don't do the 'stop'.
+      if (!succeeded && ct != null) {
+        try {
+          ct.stop();
+        } catch (RuntimeException re) {
+          LOG.error("Failed to clean up HBase's internal catalog tracker after a failed initialization.
" +
+            "We may have leaked network connections to ZooKeeper; they won't be cleaned up
until " +
+            "the JVM exits. If you see a large number of stale connections to ZooKeeper this
is likely " +
+            "the cause. The following exception details will be needed for assistance from
the " +
+            "HBase community.", re);
+        }
+        ct = null;
+      }
     }
     return ct;
   }
 
-  private void cleanupCatalogTracker(final CatalogTracker ct) {
+  @VisibleForTesting
+  CatalogTracker startCatalogTracker(final CatalogTracker ct)
+  throws IOException, InterruptedException {
+    ct.start();
+    return ct;
+  }
+
+  @VisibleForTesting
+  void cleanupCatalogTracker(final CatalogTracker ct) {
     ct.stop();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/23c71579/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java b/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
index c445433..f249550 100644
--- a/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
+++ b/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import org.apache.zookeeper.KeeperException;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -33,6 +34,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -109,6 +111,47 @@ public class TestAdmin {
   public void tearDown() throws Exception {
   }
 
+  @Test (timeout=300000)
+  public void testFailedCatalogTrackerGetCleansUpProperly()
+  throws IOException {
+    // An HBaseAdmin that we can make fail when it goes to get catalogtracker.
+    final AtomicBoolean fail = new AtomicBoolean(false);
+    final AtomicReference<CatalogTracker> internalCt = new AtomicReference<CatalogTracker>();
+    HBaseAdmin doctoredAdmin = new HBaseAdmin(this.admin.getConfiguration()) {
+      @Override
+      protected CatalogTracker startCatalogTracker(CatalogTracker ct)
+      throws IOException, InterruptedException {
+        internalCt.set(ct);
+        super.startCatalogTracker(ct);
+        if (fail.get()) {
+          throw new IOException("Intentional test fail",
+            new KeeperException.ConnectionLossException());
+        }
+        return ct;
+      }
+    };
+    try {
+      CatalogTracker ct = doctoredAdmin.getCatalogTracker();
+      assertFalse(ct.isStopped());
+      doctoredAdmin.cleanupCatalogTracker(ct);
+      assertTrue(ct.isStopped());
+      // Now have mess with our doctored admin and make the start of catalog tracker 'fail'.
+      fail.set(true);
+      boolean expectedException = false;
+      try {
+        doctoredAdmin.getCatalogTracker();
+      } catch (IOException ioe) {
+        assertTrue(ioe.getCause() instanceof KeeperException.ConnectionLossException);
+        expectedException = true;
+      }
+      if (!expectedException) fail("Didn't get expected exception!");
+      // Assert that the internally made ct was properly shutdown.
+      assertTrue("Internal CatalogTracker not closed down", internalCt.get().isStopped());
+    } finally {
+      doctoredAdmin.close();
+    }
+  }
+
   @Test
   public void testSplitFlushCompactUnknownTable() throws InterruptedException {
     final String unknowntable = "fubar";


Mime
View raw message