hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r1531150 - /hbase/trunk/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
Date Thu, 10 Oct 2013 23:40:06 GMT
Author: stack
Date: Thu Oct 10 23:40:06 2013
New Revision: 1531150

URL: http://svn.apache.org/r1531150
Log:
HBASE-9743 RollingBatchRestartRsAction aborts if timeout

Modified:
    hbase/trunk/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java

Modified: hbase/trunk/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java?rev=1531150&r1=1531149&r2=1531150&view=diff
==============================================================================
--- hbase/trunk/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
(original)
+++ hbase/trunk/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
Thu Oct 10 23:40:06 2013
@@ -18,11 +18,15 @@
 
 package org.apache.hadoop.hbase.chaos.actions;
 
+import java.io.IOException;
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Queue;
 
 import org.apache.commons.lang.math.RandomUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
 
@@ -31,6 +35,8 @@ import org.apache.hadoop.hbase.chaos.mon
  * server, or starts one, sleeping randomly (0-sleepTime) in between steps.
  */
 public class RollingBatchRestartRsAction extends BatchRestartRsAction {
+  private static Log LOG = LogFactory.getLog(RollingBatchRestartRsAction.class);
+
   public RollingBatchRestartRsAction(long sleepTime, float ratio) {
     super(sleepTime, ratio);
   }
@@ -57,14 +63,64 @@ public class RollingBatchRestartRsAction
 
       if (action) {
         ServerName server = serversToBeKilled.remove();
-        killRs(server);
+        try {
+          killRs(server);
+        } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
+          // We've seen this in test runs where we timeout but the kill went through. HBASE-9743
+          // So, add to deadServers even if exception so the start gets called.
+          LOG.info("Problem killing but presume successful; code=" + e.getExitCode(), e);
+        }
         deadServers.add(server);
       } else {
-        ServerName server = deadServers.remove();
-        startRs(server);
+        try {
+          ServerName server = deadServers.remove();
+          startRs(server);
+        } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
+          // The start may fail but better to just keep going though we may lose server.
+          //
+          LOG.info("Problem starting, will retry; code=" + e.getExitCode(), e);
+        }
       }
 
       sleep(RandomUtils.nextInt((int)sleepTime));
     }
   }
-}
+
+  /**
+   * Small test to ensure the class basically works.
+   * @param args
+   * @throws Exception
+   */
+  public static void main(final String[] args) throws Exception {
+    RollingBatchRestartRsAction action = new RollingBatchRestartRsAction(1, 1.0f) {
+      private int invocations = 0;
+      @Override
+      protected ServerName[] getCurrentServers() throws IOException {
+        final int count = 4;
+        List<ServerName> serverNames = new ArrayList<ServerName>(count);
+        for (int i = 0; i < 4; i++) {
+          serverNames.add(new ServerName(i + ".example.org", i, i));
+        }
+        return serverNames.toArray(new ServerName [] {});
+      }
+
+      @Override
+      protected void killRs(ServerName server) throws IOException {
+        LOG.info("Killed " + server);
+        if (this.invocations++ % 3 == 0) {
+          throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
+        }
+      }
+
+      @Override
+      protected void startRs(ServerName server) throws IOException {
+        LOG.info("Started " + server);
+        if (this.invocations++ % 3 == 0) {
+          throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
+        }
+      }
+    };
+
+    action.perform();
+  }
+}
\ No newline at end of file



Mime
View raw message