hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From e..@apache.org
Subject svn commit: r1239774 - in /hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common: ./ src/main/java/org/apache/hadoop/ha/ src/test/java/org/apache/hadoop/ha/
Date Thu, 02 Feb 2012 19:20:33 GMT
Author: eli
Date: Thu Feb  2 19:20:32 2012
New Revision: 1239774

URL: http://svn.apache.org/viewvc?rev=1239774&view=rev
Log:
HADOOP-7991. HA: the FailoverController should check the standby is ready before failing over.
Contributed by Eli Collins

Modified:
    hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
    hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
    hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
    hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
    hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
    hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java

Modified: hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt?rev=1239774&r1=1239773&r2=1239774&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
(original)
+++ hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
Thu Feb  2 19:20:32 2012
@@ -39,3 +39,6 @@ HADOOP-7983. HA: failover should be able
 
 HADOOP-7938. HA: the FailoverController should optionally fence the
 active during failover. (eli)
+
+HADOOP-7991. HA: the FailoverController should check the standby is
+ready before failing over. (eli)

Modified: hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java?rev=1239774&r1=1239773&r2=1239774&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
(original)
+++ hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
Thu Feb  2 19:20:32 2012
@@ -46,12 +46,19 @@ public class FailoverController {
    * failover to, eg to prevent failing over to a service (eg due
    * to it being inaccessible, already active, not healthy, etc).
    *
+   * An option to ignore toSvc if it claims it is not ready to
+   * become active is provided in case performing a failover will
+   * allow it to become active, eg because it triggers a log roll
+   * so the standby can learn about new blocks and leave safemode.
+   *
    * @param toSvc service to make active
    * @param toSvcName name of service to make active
+   * @param forceActive ignore toSvc if it reports that it is not ready
    * @throws FailoverFailedException if we should avoid failover
    */
   private static void preFailoverChecks(HAServiceProtocol toSvc,
-                                        InetSocketAddress toSvcAddr)
+                                        InetSocketAddress toSvcAddr,
+                                        boolean forceActive)
       throws FailoverFailedException {
     HAServiceState toSvcState;
     try {
@@ -74,7 +81,17 @@ public class FailoverController {
       throw new FailoverFailedException(
           "Got an IO exception", e);
     }
-    // TODO(HA): ask toSvc if it's capable. Eg not in SM.
+    try {
+      if (!toSvc.readyToBecomeActive()) {
+        if (!forceActive) {
+          throw new FailoverFailedException(
+              toSvcAddr + " is not ready to become active");
+        }
+      }
+    } catch (IOException e) {
+      throw new FailoverFailedException(
+          "Got an IO exception", e);
+    }
   }
 
   /**
@@ -87,16 +104,19 @@ public class FailoverController {
    * @param toSvcAddr addr of the service to make active
    * @param fencer for fencing fromSvc
    * @param forceFence to fence fromSvc even if not strictly necessary
+   * @param forceActive try to make toSvc active even if it is not ready
    * @throws FailoverFailedException if the failover fails
    */
   public static void failover(HAServiceProtocol fromSvc,
                               InetSocketAddress fromSvcAddr,
                               HAServiceProtocol toSvc,
                               InetSocketAddress toSvcAddr,
-                              NodeFencer fencer, boolean forceFence)
+                              NodeFencer fencer,
+                              boolean forceFence,
+                              boolean forceActive)
       throws FailoverFailedException {
     Preconditions.checkArgument(fencer != null, "failover requires a fencer");
-    preFailoverChecks(toSvc, toSvcAddr);
+    preFailoverChecks(toSvc, toSvcAddr, forceActive);
 
     // Try to make fromSvc standby
     boolean tryFence = true;
@@ -145,7 +165,9 @@ public class FailoverController {
         try {
           // Unconditionally fence toSvc in case it is still trying to
           // become active, eg we timed out waiting for its response.
-          failover(toSvc, toSvcAddr, fromSvc, fromSvcAddr, fencer, true);
+          // Unconditionally force fromSvc to become active since it
+          // was previously active when we initiated failover.
+          failover(toSvc, toSvcAddr, fromSvc, fromSvcAddr, fencer, true, true);
         } catch (FailoverFailedException ffe) {
           msg += ". Failback to " + fromSvcAddr +
             " failed (" + ffe.getMessage() + ")";

Modified: hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java?rev=1239774&r1=1239773&r2=1239774&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
(original)
+++ hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
Thu Feb  2 19:20:32 2012
@@ -47,7 +47,8 @@ import com.google.common.collect.Immutab
 
 public abstract class HAAdmin extends Configured implements Tool {
   
-  private static final String FORCEFENCE = "forcefence";
+  private static final String FORCEFENCE  = "forcefence";
+  private static final String FORCEACTIVE = "forceactive";
 
   private static Map<String, UsageInfo> USAGE =
     ImmutableMap.<String, UsageInfo>builder()
@@ -56,9 +57,11 @@ public abstract class HAAdmin extends Co
     .put("-transitionToStandby",
         new UsageInfo("<host:port>", "Transitions the daemon into Standby state"))
     .put("-failover",
-        new UsageInfo("[--"+FORCEFENCE+"] <host:port> <host:port>",
+        new UsageInfo("[--"+FORCEFENCE+"] [--"+FORCEACTIVE+"] <host:port> <host:port>",
             "Failover from the first daemon to the second.\n" +
-            "Unconditionally fence services if the "+FORCEFENCE+" option is used."))
+            "Unconditionally fence services if the "+FORCEFENCE+" option is used.\n" +
+            "Try to failover to the target service even if it is not ready if the " + 
+            FORCEACTIVE + " option is used."))
     .put("-getServiceState",
         new UsageInfo("<host:port>", "Returns the state of the daemon"))
     .put("-checkHealth",
@@ -124,12 +127,14 @@ public abstract class HAAdmin extends Co
       throws IOException, ServiceFailedException {
     Configuration conf = getConf();
     boolean forceFence = false;
+    boolean forceActive = false;
 
     Options failoverOpts = new Options();
     // "-failover" isn't really an option but we need to add
     // it to appease CommandLineParser
     failoverOpts.addOption("failover", false, "failover");
     failoverOpts.addOption(FORCEFENCE, false, "force fencing");
+    failoverOpts.addOption(FORCEACTIVE, false, "force failover");
 
     CommandLineParser parser = new GnuParser();
     CommandLine cmd;
@@ -137,6 +142,7 @@ public abstract class HAAdmin extends Co
     try {
       cmd = parser.parse(failoverOpts, argv);
       forceFence = cmd.hasOption(FORCEFENCE);
+      forceActive = cmd.hasOption(FORCEACTIVE);
     } catch (ParseException pe) {
       errOut.println("failover: incorrect arguments");
       printUsage(errOut, "-failover");
@@ -172,7 +178,7 @@ public abstract class HAAdmin extends Co
 
     try {
       FailoverController.failover(proto1, addr1, proto2, addr2,
-          fencer, forceFence); 
+          fencer, forceFence, forceActive); 
       out.println("Failover from "+args[0]+" to "+args[1]+" successful");
     } catch (FailoverFailedException ffe) {
       errOut.println("Failover failed: " + ffe.getLocalizedMessage());

Modified: hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java?rev=1239774&r1=1239773&r2=1239774&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
(original)
+++ hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
Thu Feb  2 19:20:32 2012
@@ -112,4 +112,15 @@ public interface HAServiceProtocol exten
    *           if other errors happen
    */
   public HAServiceState getServiceState() throws IOException;
+
+  /**
+   * Return true if the service is capable and ready to transition
+   * from the standby state to the active state.
+   * 
+   * @return true if the service is ready to become active, false otherwise.
+   * @throws IOException
+   *           if other errors happen
+   */
+  public boolean readyToBecomeActive() throws ServiceFailedException,
+                                              IOException;
 }

Modified: hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java?rev=1239774&r1=1239773&r2=1239774&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
(original)
+++ hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
Thu Feb  2 19:20:32 2012
@@ -79,6 +79,11 @@ public class TestFailoverController {
     public HAServiceState getServiceState() throws IOException {
       return state;
     }
+
+    @Override
+    public boolean readyToBecomeActive() throws ServiceFailedException, IOException {
+      return true;
+    }
   }
   
   @Test
@@ -88,13 +93,13 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     AlwaysSucceedFencer.fenceCalled = 0;
-    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
     assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
 
     AlwaysSucceedFencer.fenceCalled = 0;
-    FailoverController.failover(svc2, svc2Addr, svc1, svc1Addr, fencer, false);
+    FailoverController.failover(svc2, svc2Addr, svc1, svc1Addr, fencer, false, false);
     assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
     assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
     assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
@@ -106,7 +111,7 @@ public class TestFailoverController {
     DummyService svc2 = new DummyService(HAServiceState.STANDBY);
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
-    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
   }
@@ -118,7 +123,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Can't failover to an already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -129,6 +134,33 @@ public class TestFailoverController {
   }
 
   @Test
+  public void testFailoverToUnreadyService() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public boolean readyToBecomeActive() throws ServiceFailedException, IOException {
+        return false;
+      }
+    };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
+
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
+      fail("Can't failover to a service that's not ready");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+
+    // Forcing it means we ignore readyToBecomeActive
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, true);
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
+  }
+
+  @Test
   public void testFailoverToUnhealthyServiceFailsAndFailsback() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
     DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
@@ -140,7 +172,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failover to unhealthy service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -162,7 +194,7 @@ public class TestFailoverController {
 
     AlwaysSucceedFencer.fenceCalled = 0;
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     } catch (FailoverFailedException ffe) {
       fail("Faulty active prevented failover");
     }
@@ -187,7 +219,7 @@ public class TestFailoverController {
 
     AlwaysFailFencer.fenceCalled = 0;
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over even though fencing failed");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -207,7 +239,7 @@ public class TestFailoverController {
 
     AlwaysFailFencer.fenceCalled = 0;
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true, false);
       fail("Failed over even though fencing requested and failed");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -238,7 +270,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     } catch (FailoverFailedException ffe) {
       fail("Non-existant active prevented failover");
     }
@@ -254,7 +286,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over to a non-existant standby");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -275,7 +307,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failover to already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -300,7 +332,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true, false);
       fail("Failed over to service that won't transition to active");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -325,7 +357,7 @@ public class TestFailoverController {
     AlwaysSucceedFencer.fenceCalled = 0;
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over to service that won't transition to active");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -352,7 +384,7 @@ public class TestFailoverController {
     AlwaysFailFencer.fenceCalled = 0;
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over to service that won't transition to active");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -383,7 +415,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
+      FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false, false);
       fail("Failover to already active service");
     } catch (FailoverFailedException ffe) {
       // Expected

Modified: hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java?rev=1239774&r1=1239773&r2=1239774&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
(original)
+++ hadoop/common/branches/HDFS-1623/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
Thu Feb  2 19:20:32 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.ha.HAServicePro
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
+import static org.mockito.Mockito.when;
 
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
@@ -44,8 +45,9 @@ public class TestHAAdmin {
   private HAServiceProtocol mockProtocol;
   
   @Before
-  public void setup() {
+  public void setup() throws IOException {
     mockProtocol = Mockito.mock(HAServiceProtocol.class);
+    when(mockProtocol.readyToBecomeActive()).thenReturn(true);
     tool = new HAAdmin() {
       @Override
       protected HAServiceProtocol getProtocol(String target) throws IOException {
@@ -131,6 +133,15 @@ public class TestHAAdmin {
   }
 
   @Test
+  public void testFailoverWithForceActive() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "foo:1234", "bar:5678", "--forceactive"));
+  }
+
+  @Test
   public void testFailoverWithInvalidFenceArg() throws Exception {
     Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
     Configuration conf = new Configuration();



Mime
View raw message