hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From l..@apache.org
Subject hadoop git commit: HDFS-11259. Update fsck to display maintenance state info. (Manoj Govindassamy via lei)
Date Thu, 19 Jan 2017 08:25:35 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 541efe18c -> 1cc5f460e


HDFS-11259. Update fsck to display maintenance state info. (Manoj Govindassamy via lei)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1cc5f460
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1cc5f460
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1cc5f460

Branch: refs/heads/branch-2
Commit: 1cc5f460edf68e04dd1972f7c1d00077e8bdd5de
Parents: 541efe1
Author: Lei Xu <lei@apache.org>
Authored: Thu Jan 19 16:24:58 2017 +0800
Committer: Lei Xu <lei@apache.org>
Committed: Thu Jan 19 16:24:58 2017 +0800

----------------------------------------------------------------------
 .../hdfs/server/namenode/NamenodeFsck.java      |  58 +++-
 .../org/apache/hadoop/hdfs/tools/DFSck.java     |   9 +-
 .../src/site/markdown/HDFSCommands.md           |   3 +-
 .../hadoop/hdfs/server/namenode/TestFsck.java   | 271 ++++++++++++++++---
 4 files changed, 293 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cc5f460/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index 1a2deb0..7c9913a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -116,6 +116,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
   public static final String HEALTHY_STATUS = "is HEALTHY";
   public static final String DECOMMISSIONING_STATUS = "is DECOMMISSIONING";
   public static final String DECOMMISSIONED_STATUS = "is DECOMMISSIONED";
+  public static final String ENTERING_MAINTENANCE_STATUS =
+      "is ENTERING MAINTENANCE";
+  public static final String IN_MAINTENANCE_STATUS = "is IN MAINTENANCE";
   public static final String NONEXISTENT_STATUS = "does not exist";
   public static final String FAILURE_STATUS = "FAILED";
   public static final String UNDEFINED = "undefined";
@@ -138,6 +141,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
 
   private boolean showReplicaDetails = false;
   private boolean showUpgradeDomains = false;
+  private boolean showMaintenanceState = false;
   private long staleInterval;
   private Tracer tracer;
 
@@ -220,6 +224,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
         this.showReplicaDetails = true;
       } else if (key.equals("upgradedomains")) {
         this.showUpgradeDomains = true;
+      } else if (key.equals("maintenance")) {
+        this.showMaintenanceState = true;
       } else if (key.equals("storagepolicies")) {
         this.showStoragePolcies = true;
       } else if (key.equals("openforwrite")) {
@@ -271,6 +277,12 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
           + numberReplicas.decommissioned());
       out.println("No. of decommissioning Replica: "
           + numberReplicas.decommissioning());
+      if (this.showMaintenanceState) {
+        out.println("No. of entering maintenance Replica: "
+            + numberReplicas.liveEnteringMaintenanceReplicas());
+        out.println("No. of in maintenance Replica: "
+            + numberReplicas.maintenanceNotForReadReplicas());
+      }
       out.println("No. of corrupted Replica: " +
           numberReplicas.corruptReplicas());
       //record datanodes that have corrupted block replica
@@ -291,6 +303,10 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
           out.print(DECOMMISSIONED_STATUS);
         } else if (dn.isDecommissionInProgress()) {
           out.print(DECOMMISSIONING_STATUS);
+        } else if (this.showMaintenanceState && dn.isEnteringMaintenance()) {
+          out.print(ENTERING_MAINTENANCE_STATUS);
+        } else if (this.showMaintenanceState && dn.isInMaintenance()) {
+          out.print(IN_MAINTENANCE_STATUS);
         } else {
           out.print(HEALTHY_STATUS);
         }
@@ -567,13 +583,21 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
       NumberReplicas numberReplicas = bm.countNodes(storedBlock);
       int decommissionedReplicas = numberReplicas.decommissioned();;
       int decommissioningReplicas = numberReplicas.decommissioning();
+      int enteringMaintenanceReplicas =
+          numberReplicas.liveEnteringMaintenanceReplicas();
+      int inMaintenanceReplicas =
+          numberReplicas.maintenanceNotForReadReplicas();
       res.decommissionedReplicas +=  decommissionedReplicas;
       res.decommissioningReplicas += decommissioningReplicas;
+      res.enteringMaintenanceReplicas += enteringMaintenanceReplicas;
+      res.inMaintenanceReplicas += inMaintenanceReplicas;
 
       // count total replicas
       int liveReplicas = numberReplicas.liveReplicas();
-      int totalReplicasPerBlock = liveReplicas + decommissionedReplicas +
-          decommissioningReplicas;
+      int totalReplicasPerBlock = liveReplicas + decommissionedReplicas
+          + decommissioningReplicas
+          + enteringMaintenanceReplicas
+          + inMaintenanceReplicas;
       res.totalReplicas += totalReplicasPerBlock;
 
       // count expected replicas
@@ -612,12 +636,14 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
         if (!showFiles) {
           out.print("\n" + path + ": ");
         }
-        out.println(" Under replicated " + block +
-                    ". Target Replicas is " +
-                    targetFileReplication + " but found " +
-                    liveReplicas + " live replica(s), " +
-                    decommissionedReplicas + " decommissioned replica(s) and " +
-                    decommissioningReplicas + " decommissioning replica(s).");
+        out.println(" Under replicated " + block + ". Target Replicas is "
+            + targetFileReplication + " but found "
+            + liveReplicas+ " live replica(s), "
+            + decommissionedReplicas + " decommissioned replica(s), "
+            + decommissioningReplicas + " decommissioning replica(s)"
+            + (this.showMaintenanceState ? (enteringMaintenanceReplicas
+            + ", entering maintenance replica(s) and " + inMaintenanceReplicas
+            + " in maintenance replica(s).") : "."));
       }
 
       // count mis replicated blocks
@@ -678,6 +704,12 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
                 sb.append("DECOMMISSIONED)");
               } else if (dnDesc.isDecommissionInProgress()) {
                 sb.append("DECOMMISSIONING)");
+              } else if (this.showMaintenanceState &&
+                  dnDesc.isEnteringMaintenance()) {
+                sb.append("ENTERING MAINTENANCE)");
+              } else if (this.showMaintenanceState &&
+                  dnDesc.isInMaintenance()) {
+                sb.append("IN MAINTENANCE)");
               } else if (corruptReplicas != null && corruptReplicas.contains(dnDesc))
{
                 sb.append("CORRUPT)");
               } else if (blocksExcess != null && blocksExcess.contains(block.getLocalBlock()))
{
@@ -991,6 +1023,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
     long missingReplicas = 0L;
     long decommissionedReplicas = 0L;
     long decommissioningReplicas = 0L;
+    long enteringMaintenanceReplicas = 0L;
+    long inMaintenanceReplicas = 0L;
     long numUnderMinReplicatedBlocks=0L;
     long numOverReplicatedBlocks = 0L;
     long numUnderReplicatedBlocks = 0L;
@@ -1133,6 +1167,14 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
         res.append("\n DecommissioningReplicas:\t").append(
             decommissioningReplicas);
       }
+      if (enteringMaintenanceReplicas > 0) {
+        res.append("\n EnteringMaintenanceReplicas:\t").append(
+            enteringMaintenanceReplicas);
+      }
+      if (inMaintenanceReplicas > 0) {
+        res.append("\n InMaintenanceReplicas:\t").append(
+            inMaintenanceReplicas);
+      }
       return res.toString();
     }
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cc5f460/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
index dd6b5d2..199f459 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
@@ -80,7 +80,7 @@ public class DFSck extends Configured implements Tool {
       + "[-files [-blocks [-locations | -racks | -replicaDetails | " +
           "-upgradedomains]]]] "
       + "[-includeSnapshots] "
-      + "[-storagepolicies] [-blockId <blk_Id>]\n"
+      + "[-storagepolicies] [-maintenance] [-blockId <blk_Id>]\n"
       + "\t<path>\tstart checking from this path\n"
       + "\t-move\tmove corrupted files to /lost+found\n"
       + "\t-delete\tdelete corrupted files\n"
@@ -99,6 +99,7 @@ public class DFSck extends Configured implements Tool {
       + "\t-files -blocks -upgradedomains\tprint out upgrade domains for " +
           "every block\n"
       + "\t-storagepolicies\tprint out storage policy summary for the blocks\n"
+      + "\t-maintenance\tprint out maintenance state node details\n"
       + "\t-blockId\tprint out which file this blockId belongs to, locations"
       + " (nodes, racks) of this block, and other diagnostics info"
       + " (under replicated, corrupted or not, etc)\n\n"
@@ -283,6 +284,8 @@ public class DFSck extends Configured implements Tool {
         doListCorruptFileBlocks = true;
       } else if (args[idx].equals("-includeSnapshots")) {
         url.append("&includeSnapshots=1");
+      } else if (args[idx].equals("-maintenance")) {
+        url.append("&maintenance=1");
       } else if (args[idx].equals("-blockId")) {
         StringBuilder sb = new StringBuilder();
         idx++;
@@ -369,6 +372,10 @@ public class DFSck extends Configured implements Tool {
       errCode = 2;
     } else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
       errCode = 3;
+    } else if (lastLine.endsWith(NamenodeFsck.IN_MAINTENANCE_STATUS))  {
+      errCode = 4;
+    } else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) {
+      errCode = 5;
     }
     return errCode;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cc5f460/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md
index 2004d02..a276ff9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md
@@ -112,7 +112,7 @@ Usage:
               [-move | -delete | -openforwrite]
               [-files [-blocks [-locations | -racks | -replicaDetails | -upgradedomains]]]
               [-includeSnapshots]
-              [-storagepolicies] [-blockId <blk_Id>]
+              [-storagepolicies] [-maintenance] [-blockId <blk_Id>]
 
 | COMMAND\_OPTION | Description |
 |:---- |:---- |
@@ -129,6 +129,7 @@ Usage:
 | `-move` | Move corrupted files to /lost+found. |
 | `-openforwrite` | Print out files opened for write. |
 | `-storagepolicies` | Print out storage policy summary for the blocks. |
+| `-maintenance` | Print out maintenance state node details. |
 | `-blockId` | Print out information about the block. |
 
 Runs the HDFS filesystem checking utility. See [fsck](./HdfsUserGuide.html#fsck) for more
info.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cc5f460/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
index 7723e00..c419933 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
@@ -52,6 +52,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -155,11 +156,11 @@ public class TestFsck {
     PrintStream out = new PrintStream(bStream, true);
     GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.ALL);
     int errCode = ToolRunner.run(new DFSck(conf, out), path);
+    LOG.info("OUTPUT = " + bStream.toString());
     if (checkErrorCode) {
       assertEquals(expectedErrCode, errCode);
     }
     GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO);
-    LOG.info("OUTPUT = " + bStream.toString());
     return bStream.toString();
   }
 
@@ -787,26 +788,24 @@ public class TestFsck {
     assertTrue(outStr.contains("dfs.namenode.replication.min:\t2"));
   }
 
-  @Test(timeout = 60000)
+  @Test(timeout = 90000)
   public void testFsckReplicaDetails() throws Exception {
 
     final short replFactor = 1;
     short numDn = 1;
     final long blockSize = 512;
     final long fileSize = 1024;
-    boolean checkDecommissionInProgress = false;
     String[] racks = {"/rack1"};
     String[] hosts = {"host1"};
 
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
     conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
 
-    DistributedFileSystem dfs;
     cluster =
         new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts)
             .racks(racks).build();
     cluster.waitClusterUp();
-    dfs = cluster.getFileSystem();
+    final DistributedFileSystem dfs = cluster.getFileSystem();
 
     // create files
     final String testFile = new String("/testfile");
@@ -815,53 +814,132 @@ public class TestFsck {
     DFSTestUtil.waitReplication(dfs, path, replFactor);
 
     // make sure datanode that has replica is fine before decommission
-    String fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
-        "-replicaDetails");
+    String fsckOut = runFsck(conf, 0, true, testFile, "-files",
+        "-maintenance", "-blocks", "-replicaDetails");
     assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
     assertTrue(fsckOut.contains("(LIVE)"));
+    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
+    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
 
     // decommission datanode
-    ExtendedBlock eb = DFSTestUtil.getFirstBlock(dfs, path);
     FSNamesystem fsn = cluster.getNameNode().getNamesystem();
     BlockManager bm = fsn.getBlockManager();
-    BlockCollection bc = null;
-    try {
-      fsn.writeLock();
-      BlockInfo bi = bm.getStoredBlock(eb.getLocalBlock());
-      bc = fsn.getBlockCollection(bi);
-    } finally {
-      fsn.writeUnlock();
-    }
-    DatanodeDescriptor dn = bc.getBlocks()[0]
-        .getDatanode(0);
-    bm.getDatanodeManager().getDecomManager().startDecommission(dn);
-    String dnName = dn.getXferAddr();
+    final DatanodeManager dnm = bm.getDatanodeManager();
+    DatanodeDescriptor dnDesc0 = dnm.getDatanode(
+        cluster.getDataNodes().get(0).getDatanodeId());
+
+    bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
+    final String dn0Name = dnDesc0.getXferAddr();
 
     // check the replica status while decommissioning
-    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
-        "-replicaDetails");
+    fsckOut = runFsck(conf, 0, true, testFile, "-files",
+        "-maintenance", "-blocks", "-replicaDetails");
     assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
+    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
+    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
 
-    // Start 2nd Datanode and wait for decommission to start
-    cluster.startDataNodes(conf, 1, true, null, null, null);
-    DatanodeInfo datanodeInfo = null;
-    do {
-      Thread.sleep(2000);
-      for (DatanodeInfo info : dfs.getDataNodeStats()) {
-        if (dnName.equals(info.getXferAddr())) {
-          datanodeInfo = info;
+    // Start 2nd DataNode
+    cluster.startDataNodes(conf, 1, true, null,
+        new String[] {"/rack2"}, new String[] {"host2"}, null, false);
+
+    // Wait for decommission to start
+    final AtomicBoolean checkDecommissionInProgress =
+        new AtomicBoolean(false);
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        DatanodeInfo datanodeInfo = null;
+        try {
+          for (DatanodeInfo info : dfs.getDataNodeStats()) {
+            if (dn0Name.equals(info.getXferAddr())) {
+              datanodeInfo = info;
+            }
+          }
+          if (!checkDecommissionInProgress.get() && datanodeInfo != null
+              && datanodeInfo.isDecommissionInProgress()) {
+            checkDecommissionInProgress.set(true);
+          }
+          if (datanodeInfo != null && datanodeInfo.isDecommissioned()) {
+            return true;
+          }
+        } catch (Exception e) {
+          LOG.warn("Unexpected exception: " + e);
+          return false;
         }
+        return false;
       }
-      if (!checkDecommissionInProgress && datanodeInfo != null
-          && datanodeInfo.isDecommissionInProgress()) {
-        checkDecommissionInProgress = true;
+    }, 500, 30000);
+
+    // check the replica status after decommission is done
+    fsckOut = runFsck(conf, 0, true, testFile, "-files",
+        "-maintenance", "-blocks", "-replicaDetails");
+    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
+    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
+    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
+
+    DatanodeDescriptor dnDesc1 = dnm.getDatanode(
+        cluster.getDataNodes().get(1).getDatanodeId());
+    final String dn1Name = dnDesc1.getXferAddr();
+
+    bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1,
+        Long.MAX_VALUE);
+
+    // check the replica status while entering maintenance
+    fsckOut = runFsck(conf, 0, true, testFile, "-files",
+        "-maintenance", "-blocks", "-replicaDetails");
+    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
+    assertTrue(fsckOut.contains("(ENTERING MAINTENANCE)"));
+    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
+
+    // check entering maintenance replicas are printed only when requested
+    fsckOut = runFsck(conf, 0, true, testFile, "-files",
+        "-blocks", "-replicaDetails");
+    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
+    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
+    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
+
+
+    // Start 3rd DataNode
+    cluster.startDataNodes(conf, 1, true, null,
+        new String[] {"/rack3"}, new String[] {"host3"}, null, false);
+
+    // Wait for the 2nd node to reach in maintenance state
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        DatanodeInfo dnInfo = null;
+        try {
+          for (DatanodeInfo info : dfs.getDataNodeStats()) {
+            if (dn1Name.equals(info.getXferAddr())) {
+              dnInfo = info;
+            }
+          }
+          if (dnInfo != null && dnInfo.isInMaintenance()) {
+            return true;
+          }
+        } catch (Exception e) {
+          LOG.warn("Unexpected exception: " + e);
+          return false;
+        }
+        return false;
       }
-    } while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
+    }, 500, 30000);
 
     // check the replica status after decommission is done
-    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
-        "-replicaDetails");
+    fsckOut = runFsck(conf, 0, true, testFile, "-files",
+        "-maintenance", "-blocks", "-replicaDetails");
+    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
+    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
+    assertTrue(fsckOut.contains("(IN MAINTENANCE)"));
+
+    // check in maintenance replicas are not printed when not requested
+    fsckOut = runFsck(conf, 0, true, testFile, "-files",
+        "-blocks", "-replicaDetails");
     assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
+    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
+    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
+
+
   }
 
   /** Test if fsck can return -1 in case of failure.
@@ -1367,6 +1445,124 @@ public class TestFsck {
   }
 
   /**
+   * Test for blockIdCK with datanode maintenance.
+   */
+  @Test (timeout = 90000)
+  public void testBlockIdCKMaintenance() throws Exception {
+    final short replFactor = 2;
+    short numDn = 2;
+    final long blockSize = 512;
+    String[] hosts = {"host1", "host2"};
+    String[] racks = {"/rack1", "/rack2"};
+
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
+        replFactor);
+
+    cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(numDn)
+        .hosts(hosts)
+        .racks(racks)
+        .build();
+
+    assertNotNull("Failed Cluster Creation", cluster);
+    cluster.waitClusterUp();
+    final DistributedFileSystem dfs = cluster.getFileSystem();
+    assertNotNull("Failed to get FileSystem", dfs);
+
+    DFSTestUtil util = new DFSTestUtil.Builder().
+        setName(getClass().getSimpleName()).setNumFiles(1).build();
+    //create files
+    final String pathString = new String("/testfile");
+    final Path path = new Path(pathString);
+    util.createFile(dfs, path, 1024, replFactor, 1000L);
+    util.waitReplication(dfs, path, replFactor);
+    StringBuilder sb = new StringBuilder();
+    for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
+      sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
+    }
+    final String[] bIds = sb.toString().split(" ");
+
+    //make sure datanode that has replica is fine before maintenance
+    String outStr = runFsck(conf, 0, true, "/",
+        "-maintenance", "-blockId", bIds[0]);
+    System.out.println(outStr);
+    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
+
+    FSNamesystem fsn = cluster.getNameNode().getNamesystem();
+    BlockManager bm = fsn.getBlockManager();
+    DatanodeManager dnm = bm.getDatanodeManager();
+    DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
+        .getDatanodeId());
+    bm.getDatanodeManager().getDecomManager().startMaintenance(dn,
+        Long.MAX_VALUE);
+    final String dnName = dn.getXferAddr();
+
+    //wait for the node to enter maintenance state
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        DatanodeInfo datanodeInfo = null;
+        try {
+          for (DatanodeInfo info : dfs.getDataNodeStats()) {
+            if (dnName.equals(info.getXferAddr())) {
+              datanodeInfo = info;
+            }
+          }
+          if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
+            String fsckOut = runFsck(conf, 5, false, "/",
+                "-maintenance", "-blockId", bIds[0]);
+            assertTrue(fsckOut.contains(
+                NamenodeFsck.ENTERING_MAINTENANCE_STATUS));
+            return true;
+          }
+        } catch (Exception e) {
+          LOG.warn("Unexpected exception: " + e);
+          return false;
+        }
+        return false;
+      }
+    }, 500, 30000);
+
+    // Start 3rd DataNode
+    cluster.startDataNodes(conf, 1, true, null,
+        new String[] {"/rack3"}, new String[] {"host3"}, null, false);
+
+    // Wait for 1st node to reach in maintenance state
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        try {
+          DatanodeInfo datanodeInfo = null;
+          for (DatanodeInfo info : dfs.getDataNodeStats()) {
+            if (dnName.equals(info.getXferAddr())) {
+              datanodeInfo = info;
+            }
+          }
+          if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
+            return true;
+          }
+        } catch (Exception e) {
+          LOG.warn("Unexpected exception: " + e);
+          return false;
+        }
+        return false;
+      }
+    }, 500, 30000);
+
+    //check in maintenance node
+    String fsckOut = runFsck(conf, 4, false, "/",
+        "-maintenance", "-blockId", bIds[0]);
+    assertTrue(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
+
+    //check in maintenance node are not printed when not requested
+    fsckOut = runFsck(conf, 4, false, "/", "-blockId", bIds[0]);
+    assertFalse(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
+  }
+
+  /**
    * Test for blockIdCK with block corruption.
    */
   @Test
@@ -1385,14 +1581,13 @@ public class TestFsck {
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
     conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
 
-    DistributedFileSystem dfs = null;
     cluster =
         new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts)
             .racks(racks).build();
 
     assertNotNull("Failed Cluster Creation", cluster);
     cluster.waitClusterUp();
-    dfs = cluster.getFileSystem();
+    final DistributedFileSystem dfs = cluster.getFileSystem();
     assertNotNull("Failed to get FileSystem", dfs);
 
     DFSTestUtil util = new DFSTestUtil.Builder().


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message