hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aengin...@apache.org
Subject hadoop git commit: HDFS-10912. Ozone:SCM: Add chill mode support to NodeManager. Contributed by Anu Engineer.
Date Thu, 13 Oct 2016 23:01:03 GMT
Repository: hadoop
Updated Branches:
  refs/heads/HDFS-7240 841742cdd -> 1fc744c6c


HDFS-10912. Ozone:SCM: Add chill mode support to NodeManager. Contributed by Anu Engineer.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1fc744c6
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1fc744c6
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1fc744c6

Branch: refs/heads/HDFS-7240
Commit: 1fc744c6c5bfffa9aedde5aa66c38ac47847e9a0
Parents: 841742c
Author: Anu Engineer <aengineer@apache.org>
Authored: Thu Oct 13 16:00:29 2016 -0700
Committer: Anu Engineer <aengineer@apache.org>
Committed: Thu Oct 13 16:00:29 2016 -0700

----------------------------------------------------------------------
 .../hadoop/ozone/scm/node/NodeManager.java      |  43 +++++++-
 .../hadoop/ozone/scm/node/SCMNodeManager.java   | 107 ++++++++++++++++---
 .../hadoop/ozone/scm/node/TestNodeManager.java  |  92 ++++++++++++----
 3 files changed, 204 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/1fc744c6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/NodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/NodeManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/NodeManager.java
index 699c789..9de6c81 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/NodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/NodeManager.java
@@ -94,18 +94,51 @@ public interface NodeManager extends Closeable, Runnable {
   List<DatanodeID> getAllNodes();
 
   /**
-   * Get the minimum number of nodes to get out of safe mode.
+   * Get the minimum number of nodes to get out of chill mode.
    *
    * @return int
    */
-  int getMinimumSafeModeNodes();
+  int getMinimumChillModeNodes();
 
   /**
-   * Reports if we have exited out of safe mode by discovering enough nodes.
+   * Reports if we have exited out of chill mode by discovering enough nodes.
    *
-   * @return True if we are out of Node layer safe mode, false otherwise.
+   * @return True if we are out of Node layer chill mode, false otherwise.
    */
-  boolean isOutOfNodeSafeMode();
+  boolean isOutOfNodeChillMode();
+
+  /**
+   * Chill mode is the period when node manager waits for a minimum
+   * configured number of datanodes to report in. This is called chill mode
+   * to indicate the period before node manager gets into action.
+   *
+   * Forcefully exits the chill mode, even if we have not met the minimum
+   * criteria of the nodes reporting in.
+   */
+  void forceExitChillMode();
+
+  /**
+   * Forcefully enters chill mode, even if all minimum node conditions are met.
+   */
+  void forceEnterChillMode();
+
+  /**
+   * Clears the manual chill mode flag.
+   */
+  void clearChillModeFlag();
+
+  /**
+   * Returns a chill mode status string.
+   * @return String
+   */
+  String getChillModeStatus();
+
+  /**
+   * Returns the status of manual chill mode flag.
+   * @return true if forceEnterChillMode has been called,
+   * false if forceExitChillMode or status is not set. eg. clearChillModeFlag.
+   */
+  boolean isInManualChillMode();
 
   /**
    * Enum that represents the Node State. This is used in calls to getNodeList

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1fc744c6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/SCMNodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/SCMNodeManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/SCMNodeManager.java
index e866dbc..da3710f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/SCMNodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/node/SCMNodeManager.java
@@ -17,6 +17,7 @@
 package org.apache.hadoop.ozone.scm.node;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.hadoop.conf.Configuration;
@@ -35,7 +36,6 @@ import java.util.Map;
 import java.util.Queue;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -101,8 +101,9 @@ public class SCMNodeManager implements NodeManager {
   private long lastHBcheckStart;
   private long lastHBcheckFinished = 0;
   private long lastHBProcessedCount;
-  private int safeModeNodeCount;
+  private int chillModeNodeCount;
   private final int maxHBToProcessPerLoop;
+  private Optional<Boolean> inManualChillMode;
 
   /**
    * Constructs SCM machine Manager.
@@ -120,7 +121,7 @@ public class SCMNodeManager implements NodeManager {
     totalNodes = new AtomicInteger(0);
 
     // TODO: Support this value as a Percentage of known machines.
-    safeModeNodeCount = 1;
+    chillModeNodeCount = 1;
 
     staleNodeIntervalMs = OzoneClientUtils.getStaleNodeInterval(conf);
     deadNodeIntervalMs = OzoneClientUtils.getDeadNodeInterval(conf);
@@ -132,6 +133,7 @@ public class SCMNodeManager implements NodeManager {
     executorService = HadoopExecutors.newScheduledThreadPool(1,
         new ThreadFactoryBuilder().setDaemon(true)
             .setNameFormat("SCM Heartbeat Processing Thread - %d").build());
+    this.inManualChillMode =  Optional.absent();
 
     Preconditions.checkState(heartbeatCheckerIntervalMs > 0);
     executorService.schedule(this, heartbeatCheckerIntervalMs,
@@ -243,36 +245,111 @@ public class SCMNodeManager implements NodeManager {
   }
 
   /**
-   * Get the minimum number of nodes to get out of safe mode.
+   * Get the minimum number of nodes to get out of Chill mode.
    *
    * @return int
    */
   @Override
-  public int getMinimumSafeModeNodes() {
-    return safeModeNodeCount;
+  public int getMinimumChillModeNodes() {
+    return chillModeNodeCount;
   }
 
   /**
-   * Sets the Minimum SafeModeNode count, used only in testing.
+   * Sets the Minimum chill mode nodes count, used only in testing.
    *
    * @param count  - Number of nodes.
    */
   @VisibleForTesting
-  public void setMinimumSafeModeNodes(int count) {
-    safeModeNodeCount = count;
+  public void setMinimumChillModeNodes(int count) {
+    chillModeNodeCount = count;
   }
 
   /**
-   * Reports if we have exited out of safe mode.
+   * Reports if we have exited out of chill mode.
    *
-   * @return true if we are out of safe mode.
+   * @return true if we are out of chill mode.
    */
   @Override
-  public boolean isOutOfNodeSafeMode() {
-    LOG.trace("Node count : {}", totalNodes.get());
+  public boolean isOutOfNodeChillMode() {
+    if (inManualChillMode.isPresent()) {
+      return !inManualChillMode.get();
+    }
+
+    return (totalNodes.get() >= getMinimumChillModeNodes());
+  }
+
+  /**
+   * Clears the manual chill mode.
+   */
+  @Override
+  public void clearChillModeFlag() {
+    this.inManualChillMode = Optional.absent();
+  }
+
+  /**
+   * Returns chill mode Status string.
+   * @return String
+   */
+  @Override
+  public String getChillModeStatus() {
+    if (inManualChillMode.isPresent() && inManualChillMode.get()) {
+      return "Manual chill mode is set to true." +
+          getNodeStatus();
+    }
+
+    if (inManualChillMode.isPresent() && !inManualChillMode.get()) {
+      return "Manual chill mode is set to false." +
+          getNodeStatus();
+    }
+
+    if (isOutOfNodeChillMode()) {
+      return "Out of chill mode." + getNodeStatus();
+    } else {
+      return "Still in chill mode. Waiting on nodes to report in."
+          + getNodeStatus();
+    }
+  }
 
-    //TODO : Support a boolean to force getting out of Safe mode.
-    return (totalNodes.get() >= getMinimumSafeModeNodes());
+  /**
+   * Returns a node status string.
+   * @return - String
+   */
+  private String getNodeStatus() {
+    final String chillModeStatus = " %d of out of total "
+        + "%d nodes have reported in.";
+    return String.format(chillModeStatus, totalNodes.get(),
+        getMinimumChillModeNodes());
+  }
+
+  /**
+   * Returns the status of Manual chill Mode flag.
+   *
+   * @return true if forceEnterChillMode has been called, false if
+   * forceExitChillMode or status is not set. eg. clearChillModeFlag.
+   */
+  @Override
+  public boolean isInManualChillMode() {
+    if(this.inManualChillMode.isPresent()) {
+      return this.inManualChillMode.get();
+    }
+    return false;
+  }
+
+  /**
+   * Forcefully exits the chill mode even if we have not met the minimum
+   * criteria of exiting the chill mode.
+   */
+  @Override
+  public void forceExitChillMode() {
+    this.inManualChillMode = Optional.of(false);
+  }
+
+  /**
+   * Forcefully enters chill mode, even if all chill mode conditions are met.
+   */
+  @Override
+  public void forceEnterChillMode() {
+    this.inManualChillMode = Optional.of(true);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1fc744c6/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/node/TestNodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/node/TestNodeManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/node/TestNodeManager.java
index 81fea89..3d990cd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/node/TestNodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/node/TestNodeManager.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.OzoneConfiguration;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
@@ -107,14 +108,14 @@ public class TestNodeManager {
 
   SCMNodeManager createNodeManager(Configuration config) throws IOException {
     SCMNodeManager nodeManager = new SCMNodeManager(config);
-    assertFalse("Node manager should be in safe mode",
-        nodeManager.isOutOfNodeSafeMode());
+    assertFalse("Node manager should be in chill mode",
+        nodeManager.isOutOfNodeChillMode());
     return nodeManager;
   }
 
   /**
-   * Tests that Node manager handles heartbeats correctly, and comes out of Safe
-   * Mode.
+   * Tests that Node manager handles heartbeats correctly, and comes out of
+   * chill Mode.
    *
    * @throws IOException
    * @throws InterruptedException
@@ -127,7 +128,7 @@ public class TestNodeManager {
     try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
 
       // Send some heartbeats from different nodes.
-      for (int x = 0; x < nodeManager.getMinimumSafeModeNodes(); x++) {
+      for (int x = 0; x < nodeManager.getMinimumChillModeNodes(); x++) {
         nodeManager.updateHeartbeat(getDatanodeID());
       }
 
@@ -136,13 +137,13 @@ public class TestNodeManager {
           4 * 1000);
 
       assertTrue("Heartbeat thread should have picked up the scheduled " +
-              "heartbeats and transitioned out of safe mode.",
-          nodeManager.isOutOfNodeSafeMode());
+              "heartbeats and transitioned out of chill mode.",
+          nodeManager.isOutOfNodeChillMode());
     }
   }
 
   /**
-   * asserts that if we send no heartbeats node manager stays in safemode.
+   * asserts that if we send no heartbeats node manager stays in chillmode.
    *
    * @throws IOException
    * @throws InterruptedException
@@ -155,13 +156,13 @@ public class TestNodeManager {
     try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
       GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatThead(), 100,
           4 * 1000);
-      assertFalse("No heartbeats, Node manager should have been in safe mode.",
-          nodeManager.isOutOfNodeSafeMode());
+      assertFalse("No heartbeats, Node manager should have been in chill mode.",
+          nodeManager.isOutOfNodeChillMode());
     }
   }
 
   /**
-   * Asserts that if we don't get enough unique nodes we stay in safemode.
+   * Asserts that if we don't get enough unique nodes we stay in chillmode.
    *
    * @throws IOException
    * @throws InterruptedException
@@ -172,13 +173,13 @@ public class TestNodeManager {
       InterruptedException, TimeoutException {
     try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
 
-      // Need 100 nodes to come out of safe mode, only one node is sending HB.
-      nodeManager.setMinimumSafeModeNodes(100);
+      // Need 100 nodes to come out of chill mode, only one node is sending HB.
+      nodeManager.setMinimumChillModeNodes(100);
       nodeManager.updateHeartbeat(getDatanodeID());
       GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatThead(), 100,
           4 * 1000);
       assertFalse("Not enough heartbeat, Node manager should have been in " +
-          "safemode.", nodeManager.isOutOfNodeSafeMode());
+          "chillmode.", nodeManager.isOutOfNodeChillMode());
     }
   }
 
@@ -195,10 +196,10 @@ public class TestNodeManager {
       InterruptedException, TimeoutException {
 
     try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
-      nodeManager.setMinimumSafeModeNodes(3);
+      nodeManager.setMinimumChillModeNodes(3);
       DatanodeID datanodeID = getDatanodeID();
 
-      // Send 10 heartbeat from same node, and assert we never leave safe mode.
+      // Send 10 heartbeat from same node, and assert we never leave chill mode.
       for (int x = 0; x < 10; x++) {
         nodeManager.updateHeartbeat(datanodeID);
       }
@@ -206,7 +207,7 @@ public class TestNodeManager {
       GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatThead(), 100,
           4 * 1000);
       assertFalse("Not enough nodes have send heartbeat to node manager.",
-          nodeManager.isOutOfNodeSafeMode());
+          nodeManager.isOutOfNodeChillMode());
     }
   }
 
@@ -234,7 +235,7 @@ public class TestNodeManager {
     Thread.sleep(2 * 1000);
 
     assertFalse("Node manager executor service is shutdown, should never exit" +
-        " safe mode", nodeManager.isOutOfNodeSafeMode());
+        " chill mode", nodeManager.isOutOfNodeChillMode());
 
     assertEquals("Assert new HBs were never processed", 0,
         nodeManager.getLastHBProcessedCount());
@@ -861,4 +862,59 @@ public class TestNodeManager {
           "counts."));
     }
   }
+
+  @Test
+  public void testScmEnterAndExistChillMode() throws IOException,
+      InterruptedException {
+    Configuration conf = getConf();
+    conf.setInt(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL_MS, 100);
+
+    try (SCMNodeManager nodeManager = createNodeManager(conf)) {
+      nodeManager.setMinimumChillModeNodes(10);
+      nodeManager.updateHeartbeat(getDatanodeID());
+      String status = nodeManager.getChillModeStatus();
+      Assert.assertThat(status, CoreMatchers.containsString("Still in chill " +
+          "mode. Waiting on nodes to report in."));
+
+      // Should not exist chill mode since 10 nodes have not heartbeat yet.
+      assertFalse(nodeManager.isOutOfNodeChillMode());
+      assertFalse((nodeManager.isInManualChillMode()));
+
+      // Force exit chill mode.
+      nodeManager.forceExitChillMode();
+      assertTrue(nodeManager.isOutOfNodeChillMode());
+      status = nodeManager.getChillModeStatus();
+      Assert.assertThat(status,
+          CoreMatchers.containsString("Manual chill mode is set to false."));
+      assertFalse((nodeManager.isInManualChillMode()));
+
+
+      // Enter back to into chill mode.
+      nodeManager.forceEnterChillMode();
+      assertFalse(nodeManager.isOutOfNodeChillMode());
+      status = nodeManager.getChillModeStatus();
+      Assert.assertThat(status,
+          CoreMatchers.containsString("Manual chill mode is set to true."));
+      assertTrue((nodeManager.isInManualChillMode()));
+
+
+      // Assert that node manager force enter cannot be overridden by nodes HBs.
+      for(int x= 0; x < 20; x++) {
+        nodeManager.updateHeartbeat(getDatanodeID());
+      }
+
+      Thread.sleep(500);
+      assertFalse(nodeManager.isOutOfNodeChillMode());
+
+      // Make sure that once we clear the manual chill mode flag, we fall back
+      // to the number of nodes to get out chill mode.
+      nodeManager.clearChillModeFlag();
+      assertTrue(nodeManager.isOutOfNodeChillMode());
+      status = nodeManager.getChillModeStatus();
+      Assert.assertThat(status,
+          CoreMatchers.containsString("Out of chill mode."));
+      assertFalse(nodeManager.isInManualChillMode());
+    }
+
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message