hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From manoj...@apache.org
Subject [1/2] hadoop git commit: HDFS-9388. Decommission related code to support Maintenance State for datanodes.
Date Thu, 03 Aug 2017 00:02:41 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 45cde760b -> 065a906be


http://git-wip-us.apache.org/repos/asf/hadoop/blob/065a906b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
index d51e0cb..e9f26c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
@@ -830,7 +830,8 @@ public class TestFsck {
     DatanodeDescriptor dnDesc0 = dnm.getDatanode(
         cluster.getDataNodes().get(0).getDatanodeId());
 
-    bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
+    bm.getDatanodeManager().getDatanodeAdminManager().startDecommission(
+        dnDesc0);
     final String dn0Name = dnDesc0.getXferAddr();
 
     // check the replica status while decommissioning
@@ -883,7 +884,7 @@ public class TestFsck {
         cluster.getDataNodes().get(1).getDatanodeId());
     final String dn1Name = dnDesc1.getXferAddr();
 
-    bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1,
+    bm.getDatanodeManager().getDatanodeAdminManager().startMaintenance(dnDesc1,
         Long.MAX_VALUE);
 
     // check the replica status while entering maintenance
@@ -1419,7 +1420,7 @@ public class TestFsck {
       fsn.writeUnlock();
     }
     DatanodeDescriptor dn = bc.getBlocks()[0].getDatanode(0);
-    bm.getDatanodeManager().getDecomManager().startDecommission(dn);
+    bm.getDatanodeManager().getDatanodeAdminManager().startDecommission(dn);
     String dnName = dn.getXferAddr();
 
     //wait for decommission start
@@ -1498,7 +1499,7 @@ public class TestFsck {
     DatanodeManager dnm = bm.getDatanodeManager();
     DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
         .getDatanodeId());
-    bm.getDatanodeManager().getDecomManager().startMaintenance(dn,
+    bm.getDatanodeManager().getDatanodeAdminManager().startMaintenance(dn,
         Long.MAX_VALUE);
     final String dnName = dn.getXferAddr();
 
@@ -1732,7 +1733,7 @@ public class TestFsck {
     }
     DatanodeDescriptor dn = bc.getBlocks()[0]
         .getDatanode(0);
-    bm.getDatanodeManager().getDecomManager().startDecommission(dn);
+    bm.getDatanodeManager().getDatanodeAdminManager().startDecommission(dn);
     String dnName = dn.getXferAddr();
 
     // wait for decommission start
@@ -1760,6 +1761,120 @@ public class TestFsck {
   }
 
   /**
+   * Test for blocks on maintenance hosts are not shown as missing.
+   */
+  @Test (timeout = 90000)
+  public void testFsckWithMaintenanceReplicas() throws Exception {
+    final short replFactor = 2;
+    short numDn = 2;
+    final long blockSize = 512;
+    String[] hosts = {"host1", "host2"};
+    String[] racks = {"/rack1", "/rack2"};
+
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
+        replFactor);
+
+    cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(numDn)
+        .hosts(hosts)
+        .racks(racks)
+        .build();
+
+    assertNotNull("Failed Cluster Creation", cluster);
+    cluster.waitClusterUp();
+    final DistributedFileSystem dfs = cluster.getFileSystem();
+    assertNotNull("Failed to get FileSystem", dfs);
+
+    DFSTestUtil util = new DFSTestUtil.Builder().
+        setName(getClass().getSimpleName()).setNumFiles(1).build();
+    //create files
+    final String testFile = new String("/testfile");
+    final Path path = new Path(testFile);
+    util.createFile(dfs, path, 1024, replFactor, 1000L);
+    util.waitReplication(dfs, path, replFactor);
+    StringBuilder sb = new StringBuilder();
+    for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
+      sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
+    }
+    String[] bIds = sb.toString().split(" ");
+
+    //make sure datanode that has replica is fine before maintenance
+    String outStr = runFsck(conf, 0, true, testFile);
+    System.out.println(outStr);
+    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
+
+    FSNamesystem fsn = cluster.getNameNode().getNamesystem();
+    BlockManager bm = fsn.getBlockManager();
+    DatanodeManager dnm = bm.getDatanodeManager();
+    DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
+        .getDatanodeId());
+    bm.getDatanodeManager().getDatanodeAdminManager().startMaintenance(dn,
+        Long.MAX_VALUE);
+    final String dnName = dn.getXferAddr();
+
+    //wait for the node to enter maintenance state
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        DatanodeInfo datanodeInfo = null;
+        try {
+          for (DatanodeInfo info : dfs.getDataNodeStats()) {
+            if (dnName.equals(info.getXferAddr())) {
+              datanodeInfo = info;
+            }
+          }
+          if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
+            // verify fsck returns Healthy status
+            String fsckOut = runFsck(conf, 0, true, testFile, "-maintenance");
+            assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
+            return true;
+          }
+        } catch (Exception e) {
+          LOG.warn("Unexpected exception: " + e);
+          return false;
+        }
+        return false;
+      }
+    }, 500, 30000);
+
+    // Start 3rd DataNode and wait for node to reach in maintenance state
+    cluster.startDataNodes(conf, 1, true, null,
+        new String[] {"/rack3"}, new String[] {"host3"}, null, false);
+
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        DatanodeInfo datanodeInfo = null;
+        try {
+          for (DatanodeInfo info : dfs.getDataNodeStats()) {
+            if (dnName.equals(info.getXferAddr())) {
+              datanodeInfo = info;
+            }
+          }
+          if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
+            return true;
+          }
+        } catch (Exception e) {
+          LOG.warn("Unexpected exception: " + e);
+          return false;
+        }
+        return false;
+      }
+    }, 500, 30000);
+
+    // verify fsck returns Healthy status
+    String fsckOut = runFsck(conf, 0, true, testFile, "-maintenance");
+    assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
+
+    // verify fsck returns Healthy status even without maintenance option
+    fsckOut = runFsck(conf, 0, true, testFile);
+    assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
+  }
+
+  /**
    * Test that corrupted snapshot files are listed with full dir.
    */
   @Test

http://git-wip-us.apache.org/repos/asf/hadoop/blob/065a906b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
index 32c245c..8250ea6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import com.google.common.base.Supplier;
 import com.google.common.util.concurrent.Uninterruptibles;
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -40,6 +41,7 @@ import org.apache.hadoop.hdfs.util.HostsFileWriter;
 import org.apache.hadoop.io.nativeio.NativeIO;
 import org.apache.hadoop.io.nativeio.NativeIO.POSIX.NoMlockCacheManipulator;
 import org.apache.hadoop.net.ServerSocketUtil;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.VersionInfo;
 import org.codehaus.jackson.map.ObjectMapper;
@@ -313,6 +315,106 @@ public class TestNameNodeMXBean {
   }
 
   @Test (timeout = 120000)
+  public void testDecommissioningNodes() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 30);
+    MiniDFSCluster cluster = null;
+    HostsFileWriter hostsFileWriter = new HostsFileWriter();
+    hostsFileWriter.initialize(conf, "temp/TestNameNodeMXBean");
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+      cluster.waitActive();
+
+      final FSNamesystem fsn = cluster.getNameNode().namesystem;
+      final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+      final ObjectName mxbeanName = new ObjectName(
+          "Hadoop:service=NameNode,name=NameNodeInfo");
+
+      List<String> hosts = new ArrayList<>();
+      for(DataNode dn : cluster.getDataNodes()) {
+        hosts.add(dn.getDisplayName());
+      }
+      hostsFileWriter.initIncludeHosts(hosts.toArray(
+          new String[hosts.size()]));
+      fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
+
+      // 1. Verify Live nodes
+      String liveNodesInfo = (String) (mbs.getAttribute(mxbeanName,
+          "LiveNodes"));
+      Map<String, Map<String, Object>> liveNodes =
+          (Map<String, Map<String, Object>>) JSON.parse(liveNodesInfo);
+      assertEquals(fsn.getLiveNodes(), liveNodesInfo);
+      assertEquals(fsn.getNumLiveDataNodes(), liveNodes.size());
+
+      for (Map<String, Object> liveNode : liveNodes.values()) {
+        assertTrue(liveNode.containsKey("lastContact"));
+        assertTrue(liveNode.containsKey("xferaddr"));
+      }
+
+      // Add the 1st DataNode to Decommission list
+      hostsFileWriter.initExcludeHost(
+          cluster.getDataNodes().get(0).getDisplayName());
+      fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
+
+      // Wait for the DatanodeAdminManager to complete refresh nodes
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          try {
+            String decomNodesInfo = (String) (mbs.getAttribute(mxbeanName,
+                "DecomNodes"));
+            Map<String, Map<String, Object>> decomNodes =
+                (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
+            if (decomNodes.size() > 0) {
+              return true;
+            }
+          } catch (Exception e) {
+            return false;
+          }
+          return false;
+        }
+      }, 1000, 60000);
+
+      // 2. Verify Decommission InProgress nodes
+      String decomNodesInfo = (String) (mbs.getAttribute(mxbeanName,
+          "DecomNodes"));
+      Map<String, Map<String, Object>> decomNodes =
+          (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
+      assertEquals(fsn.getDecomNodes(), decomNodesInfo);
+      assertEquals(fsn.getNumDecommissioningDataNodes(), decomNodes.size());
+      assertEquals(0, fsn.getNumDecomLiveDataNodes());
+      assertEquals(0, fsn.getNumDecomDeadDataNodes());
+
+      // Wait for the DatanodeAdminManager to complete check
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          if (fsn.getNumDecomLiveDataNodes() == 1) {
+            return true;
+          }
+          return false;
+        }
+      }, 1000, 60000);
+
+      // 3. Verify Decommissioned nodes
+      decomNodesInfo = (String) (mbs.getAttribute(mxbeanName, "DecomNodes"));
+      decomNodes =
+          (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
+      assertEquals(0, decomNodes.size());
+      assertEquals(fsn.getDecomNodes(), decomNodesInfo);
+      assertEquals(1, fsn.getNumDecomLiveDataNodes());
+      assertEquals(0, fsn.getNumDecomDeadDataNodes());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      hostsFileWriter.cleanup();
+    }
+  }
+
+  @Test (timeout = 120000)
   public void testMaintenanceNodes() throws Exception {
     LOG.info("Starting testMaintenanceNodes");
     int expirationInMs = 30 * 1000;
@@ -387,7 +489,7 @@ public class TestNameNodeMXBean {
         assertEquals(0, fsn.getNumInMaintenanceDeadDataNodes());
       }
 
-      // Wait for the DecommissionManager to complete check
+      // Wait for the DatanodeAdminManager to complete check
       // and perform state transition
       while (fsn.getNumInMaintenanceLiveDataNodes() != 1) {
         Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/065a906b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
index c9fe2c3..b7f0cfc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
@@ -349,18 +349,18 @@ public class TestNamenodeCapacityReport {
   private void startDecommissionOrMaintenance(DatanodeManager dnm,
       DatanodeDescriptor dnd, boolean decomm) {
     if (decomm) {
-      dnm.getDecomManager().startDecommission(dnd);
+      dnm.getDatanodeAdminManager().startDecommission(dnd);
     } else {
-      dnm.getDecomManager().startMaintenance(dnd, Long.MAX_VALUE);
+      dnm.getDatanodeAdminManager().startMaintenance(dnd, Long.MAX_VALUE);
     }
   }
 
   private void stopDecommissionOrMaintenance(DatanodeManager dnm,
       DatanodeDescriptor dnd, boolean decomm) {
     if (decomm) {
-      dnm.getDecomManager().stopDecommission(dnd);
+      dnm.getDatanodeAdminManager().stopDecommission(dnd);
     } else {
-      dnm.getDecomManager().stopMaintenance(dnd);
+      dnm.getDatanodeAdminManager().stopMaintenance(dnd);
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message