hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject svn commit: r1329960 - in /hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common: CHANGES.HDFS-3042.txt src/main/java/org/apache/hadoop/ha/ZKFailoverController.java src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
Date Tue, 24 Apr 2012 19:34:51 GMT
Author: todd
Date: Tue Apr 24 19:34:51 2012
New Revision: 1329960

URL: http://svn.apache.org/viewvc?rev=1329960&view=rev
Log:
HADOOP-8306. ZKFC: improve error message when ZK is not running. Contributed by Todd Lipcon.

Modified:
    hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
    hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
    hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java

Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt?rev=1329960&r1=1329959&r2=1329960&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
(original)
+++ hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
Tue Apr 24 19:34:51 2012
@@ -19,3 +19,5 @@ HADOOP-8260. Replace ClientBaseWithFixes
 HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID (todd)
 
 HADOOP-8247. Add a config to enable auto-HA, which disables manual FailoverController (todd)
+
+HADOOP-8306. ZKFC: improve error message when ZK is not running. (todd)

Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java?rev=1329960&r1=1329959&r2=1329960&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
(original)
+++ hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
Tue Apr 24 19:34:51 2012
@@ -34,6 +34,7 @@ import org.apache.hadoop.ha.HAZKUtil.ZKA
 import org.apache.hadoop.ha.HealthMonitor.State;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.util.Tool;
+import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZooDefs.Ids;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.zookeeper.data.ACL;
@@ -76,8 +77,11 @@ public abstract class ZKFailoverControll
   static final int ERR_CODE_NO_FENCER = 4;
   /** Automatic failover is not enabled */
   static final int ERR_CODE_AUTO_FAILOVER_NOT_ENABLED = 5;
+  /** Cannot connect to ZooKeeper */
+  static final int ERR_CODE_NO_ZK = 6;
   
   private Configuration conf;
+  private String zkQuorum;
 
   private HealthMonitor healthMonitor;
   private ActiveStandbyElector elector;
@@ -162,11 +166,23 @@ public abstract class ZKFailoverControll
       }
     }
     
-    if (!elector.parentZNodeExists()) {
-      LOG.fatal("Unable to start failover controller. " +
-          "Parent znode does not exist.\n" +
-          "Run with -formatZK flag to initialize ZooKeeper.");
-      return ERR_CODE_NO_PARENT_ZNODE;
+    try {
+      if (!elector.parentZNodeExists()) {
+        LOG.fatal("Unable to start failover controller. " +
+            "Parent znode does not exist.\n" +
+            "Run with -formatZK flag to initialize ZooKeeper.");
+        return ERR_CODE_NO_PARENT_ZNODE;
+      }
+    } catch (IOException ioe) {
+      if (ioe.getCause() instanceof KeeperException.ConnectionLossException) {
+        LOG.fatal("Unable to start failover controller. Unable to connect " +
+            "to ZooKeeper quorum at " + zkQuorum + ". Please check the " +
+            "configured value for " + ZK_QUORUM_KEY + " and ensure that " +
+            "ZooKeeper is running.");
+        return ERR_CODE_NO_ZK;
+      } else {
+        throw ioe;
+      }
     }
 
     try {
@@ -248,7 +264,7 @@ public abstract class ZKFailoverControll
   }
 
   private void initZK() throws HadoopIllegalArgumentException, IOException {
-    String zkQuorum = conf.get(ZK_QUORUM_KEY);
+    zkQuorum = conf.get(ZK_QUORUM_KEY);
     int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY,
         ZK_SESSION_TIMEOUT_DEFAULT);
     // Parse ACLs from configuration.

Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java?rev=1329960&r1=1329959&r2=1329960&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
(original)
+++ hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
Tue Apr 24 19:34:51 2012
@@ -95,6 +95,18 @@ public class TestZKFailoverController ex
     assertEquals(0, runFC(svc, "-formatZK", "-force"));
   }
   
+  /**
+   * Test that if ZooKeeper is not running, the correct error
+   * code is returned.
+   */
+  @Test(timeout=15000)
+  public void testNoZK() throws Exception {
+    stopServer();
+    DummyHAService svc = cluster.getService(1);
+    assertEquals(ZKFailoverController.ERR_CODE_NO_ZK,
+        runFC(svc));
+  }
+  
   @Test
   public void testFormatOneClusterLeavesOtherClustersAlone() throws Exception {
     DummyHAService svc = cluster.getService(1);



Mime
View raw message