hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ndimi...@apache.org
Subject git commit: HBASE-12403 IntegrationTestMTTR flaky due to aggressive RS restart timeout
Date Sat, 01 Nov 2014 17:41:50 GMT
Repository: hbase
Updated Branches:
  refs/heads/master a1f59d8e1 -> 3c06b4818


HBASE-12403 IntegrationTestMTTR flaky due to aggressive RS restart timeout


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3c06b481
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3c06b481
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3c06b481

Branch: refs/heads/master
Commit: 3c06b48181e22eb4ce91d6d8a455a1617f13d85f
Parents: a1f59d8
Author: Nick Dimiduk <ndimiduk@apache.org>
Authored: Fri Oct 31 16:34:48 2014 -0700
Committer: Nick Dimiduk <ndimiduk@apache.org>
Committed: Sat Nov 1 10:34:59 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/chaos/actions/Action.java      | 32 +++++++++++++++++---
 .../hadoop/hbase/mttr/IntegrationTestMTTR.java  |  4 +++
 2 files changed, 32 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/3c06b481/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java
index dea412f..c01ce0f 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java
@@ -43,19 +43,43 @@ import org.apache.hadoop.hbase.util.Bytes;
  */
 public class Action {
 
+  public static final String KILL_MASTER_TIMEOUT_KEY =
+      "hbase.chaosmonkey.action.killmastertimeout";
+  public static final String START_MASTER_TIMEOUT_KEY =
+      "hbase.chaosmonkey.action.startmastertimeout";
+  public static final String KILL_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.killrstimeout";
+  public static final String START_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.startrstimeout";
+
   protected static Log LOG = LogFactory.getLog(Action.class);
 
+  protected static final long KILL_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
+  protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
+  protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
+  protected static final long START_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
+
   protected ActionContext context;
   protected HBaseCluster cluster;
   protected ClusterStatus initialStatus;
   protected ServerName[] initialServers;
 
+  protected long killMasterTimeout;
+  protected long startMasterTimeout;
+  protected long killRsTimeout;
+  protected long startRsTimeout;
+
   public void init(ActionContext context) throws IOException {
     this.context = context;
     cluster = context.getHBaseCluster();
     initialStatus = cluster.getInitialClusterStatus();
     Collection<ServerName> regionServers = initialStatus.getServers();
     initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
+
+    killMasterTimeout = cluster.getConf().getLong(KILL_MASTER_TIMEOUT_KEY,
+        KILL_MASTER_TIMEOUT_DEFAULT);
+    startMasterTimeout = cluster.getConf().getLong(START_MASTER_TIMEOUT_KEY,
+        START_MASTER_TIMEOUT_DEFAULT);
+    killRsTimeout = cluster.getConf().getLong(KILL_RS_TIMEOUT_KEY, KILL_RS_TIMEOUT_DEFAULT);
+    startRsTimeout = cluster.getConf().getLong(START_RS_TIMEOUT_KEY, START_RS_TIMEOUT_DEFAULT);
   }
 
   public void perform() throws Exception { }
@@ -84,21 +108,21 @@ public class Action {
   protected void killMaster(ServerName server) throws IOException {
     LOG.info("Killing master:" + server);
     cluster.killMaster(server);
-    cluster.waitForMasterToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
+    cluster.waitForMasterToStop(server, killMasterTimeout);
     LOG.info("Killed master server:" + server);
   }
 
   protected void startMaster(ServerName server) throws IOException {
     LOG.info("Starting master:" + server.getHostname());
     cluster.startMaster(server.getHostname());
-    cluster.waitForActiveAndReadyMaster(PolicyBasedChaosMonkey.TIMEOUT);
+    cluster.waitForActiveAndReadyMaster(startMasterTimeout);
     LOG.info("Started master: " + server);
   }
 
   protected void killRs(ServerName server) throws IOException {
     LOG.info("Killing region server:" + server);
     cluster.killRegionServer(server);
-    cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
+    cluster.waitForRegionServerToStop(server, killRsTimeout);
     LOG.info("Killed region server:" + server + ". Reported num of rs:"
         + cluster.getClusterStatus().getServersSize());
   }
@@ -106,7 +130,7 @@ public class Action {
   protected void startRs(ServerName server) throws IOException {
     LOG.info("Starting region server:" + server.getHostname());
     cluster.startRegionServer(server.getHostname());
-    cluster.waitForRegionServerToStart(server.getHostname(), PolicyBasedChaosMonkey.TIMEOUT);
+    cluster.waitForRegionServerToStart(server.getHostname(), startRsTimeout);
     LOG.info("Started region server:" + server + ". Reported num of rs:"
         + cluster.getClusterStatus().getServersSize());
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/3c06b481/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
index 1484873..12adc80 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
@@ -183,6 +183,10 @@ public class IntegrationTestMTTR {
   }
 
   private static void setupActions() throws IOException {
+    // allow a little more time for RS restart actions because RS start depends on having
a master
+    // to report to and the master is also being monkeyed.
+    util.getConfiguration().setLong(Action.START_RS_TIMEOUT_KEY, 3 * 60 * 1000);
+
     // Set up the action that will restart a region server holding a region from our table
     // because this table should only have one region we should be good.
     restartRSAction = new RestartRsHoldingTableAction(sleepTime, tableName.getNameAsString());


Mime
View raw message