hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmhs...@apache.org
Subject git commit: HBASE-11405 Multiple invocations of hbck in parallel disables balancer permanently (Bharath Vissapragada)
Date Wed, 17 Sep 2014 19:53:47 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1 88e7da321 -> 3b30a1042


HBASE-11405 Multiple invocations of hbck in parallel disables balancer permanently (Bharath
Vissapragada)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3b30a104
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3b30a104
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3b30a104

Branch: refs/heads/branch-1
Commit: 3b30a104247e65b6443bc94545235d789cf13185
Parents: 88e7da3
Author: Jonathan M Hsieh <jmhsieh@apache.org>
Authored: Wed Sep 17 12:51:53 2014 -0700
Committer: Jonathan M Hsieh <jmhsieh@apache.org>
Committed: Wed Sep 17 12:51:53 2014 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/util/HBaseFsck.java | 87 +++++++++++++++++++-
 .../apache/hadoop/hbase/util/TestHBaseFsck.java | 48 +++++++++++
 2 files changed, 134 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/3b30a104/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
index 017410a..6d3f2e1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
+import java.net.InetAddress;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -45,18 +46,22 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.ClusterStatus;
@@ -109,7 +114,10 @@ import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.hbase.security.AccessDeniedException;
+import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Tool;
@@ -178,6 +186,8 @@ public class HBaseFsck extends Configured {
   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
   private static final int DEFAULT_MAX_MERGE = 5;
   private static final String TO_BE_LOADED = "to_be_loaded";
+  private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
+
 
   /**********************
    * Internal resources
@@ -192,6 +202,11 @@ public class HBaseFsck extends Configured {
   private long startMillis = System.currentTimeMillis();
   private HFileCorruptionChecker hfcc;
   private int retcode = 0;
+  private static Path HBCK_LOCK_PATH;
+  private FSDataOutputStream hbckOutFd;
+  // This lock is to prevent cleanup of balancer resources twice between
+  // ShutdownHook and the main code.
+  private static AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
 
   /***********
    * Options
@@ -301,10 +316,78 @@ public class HBaseFsck extends Configured {
   }
 
   /**
+   * This method maintains a lock using a file. If the creation fails we return null
+   *
+   * @return FSDataOutputStream object corresponding to the newly opened lock file
+   * @throws IOException
+   */
+  private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
+    try {
+      FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
+      FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
+          HConstants.DATA_FILE_UMASK_KEY);
+      Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
+      fs.mkdirs(tmpDir);
+      HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
+      final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
+      out.writeBytes(InetAddress.getLocalHost().toString());
+      out.flush();
+      return out;
+    } catch(RemoteException e) {
+      if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
+        return null;
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  private void unlockHbck() throws IOException {
+    if(hbckLockCleanup.compareAndSet(false, true)){
+      IOUtils.closeStream(hbckOutFd);
+      try{
+        FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
+        //Reset the hbckLockCleanup to false so that subsequent calls using the same
+        // Hbck object succeed. This is added for tests, which keep re-using the same
+        // objects
+        hbckLockCleanup.set(false);
+      } catch(IOException ioe) {
+        LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
+        LOG.debug(ioe);
+      }
+    }
+  }
+
+  /**
    * To repair region consistency, one must call connect() in order to repair
    * online state.
    */
   public void connect() throws IOException {
+
+    // Check if another instance of balancer is running
+    hbckOutFd = checkAndMarkRunningHbck();
+    if (hbckOutFd == null) {
+      setRetCode(-1);
+      LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure"
+
+		      " no other instance is running, delete the lock file " +
+		      HBCK_LOCK_PATH + " and rerun the tool]");
+      throw new IOException("Duplicate hbck - Abort");
+    }
+
+    // Add a shutdown hook to this thread, incase user tries to
+    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
+    // it is available for further calls
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      public void run() {
+        try{
+          unlockHbck();
+        } catch(Exception e){
+          LOG.debug("Error while removing hbck lock " + e.getMessage());
+        }
+      }
+    });
+    LOG.debug("Launching hbck");
+
     connection = HConnectionManager.createConnection(getConf());
     admin = new HBaseAdmin(connection);
     meta = new HTable(TableName.META_TABLE_NAME, connection);
@@ -499,6 +582,9 @@ public class HBaseFsck extends Configured {
 
     checkAndFixTableLocks();
 
+    // Remove the hbck lock
+    unlockHbck();
+
     // Print table summary
     printTableSummary(tablesInfo);
     return errors.summarize();
@@ -3842,7 +3928,6 @@ public class HBaseFsck extends Configured {
     Path hbasedir = FSUtils.getRootDir(conf);
     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
     FSUtils.setFsDefault(conf, new Path(defaultFs));
-
     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
     System.exit(ret);
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/3b30a104/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
index b464de0..fe068c9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
@@ -36,8 +36,13 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.concurrent.Callable;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadPoolExecutor;
@@ -527,6 +532,49 @@ public class TestHBaseFsck {
   }
 
   /**
+   * This test makes sure that parallel instances of Hbck is disabled.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testParallelHbck() throws Exception {
+    final ExecutorService service;
+    final Future<HBaseFsck> hbck1,hbck2;
+
+    class RunHbck implements Callable<HBaseFsck>{
+      boolean fail = true;
+      public HBaseFsck call(){
+        try{
+          return doFsck(conf, false);
+        } catch(Exception e){
+          if (e.getMessage().contains("Duplicate hbck")) {
+            fail = false;
+          }
+        }
+        // If we reach here, then an exception was caught
+        if (fail) fail();
+        return null;
+      }
+    }
+    service = Executors.newFixedThreadPool(2);
+    hbck1 = service.submit(new RunHbck());
+    hbck2 = service.submit(new RunHbck());
+    service.shutdown();
+    //wait till hbck calls finish
+    service.awaitTermination(Integer.MAX_VALUE, TimeUnit.SECONDS);
+    HBaseFsck h1 = hbck1.get();
+    HBaseFsck h2 = hbck2.get();
+    // Make sure only one of the calls was successful
+    assert(h1 == null || h2 == null);
+    if (h1 != null) {
+      assert(h1.getRetCode() >= 0);
+    }
+    if (h2 != null) {
+      assert(h2.getRetCode() >= 0);
+    }
+  }
+
+  /**
    * This create and fixes a bad table with regions that have a duplicate
    * start key
    */


Mime
View raw message