hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From fab...@apache.org
Subject [19/50] [abbrv] hadoop git commit: HDFS-11031. Add additional unit test for DataNode startup behavior when volumes fail. Contributed by Mingliang Liu.
Date Mon, 07 Nov 2016 00:54:30 GMT
HDFS-11031. Add additional unit test for DataNode startup behavior when volumes fail. Contributed
by Mingliang Liu.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cb5cc0dc
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cb5cc0dc
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cb5cc0dc

Branch: refs/heads/HADOOP-13345
Commit: cb5cc0dc53d3543e13b7b7cf9425780ded0538cc
Parents: fc2b69e
Author: Brahma Reddy Battula <brahma@apache.org>
Authored: Wed Nov 2 10:35:47 2016 +0530
Committer: Brahma Reddy Battula <brahma@apache.org>
Committed: Wed Nov 2 10:35:47 2016 +0530

----------------------------------------------------------------------
 .../datanode/TestDataNodeVolumeFailure.java     | 111 ++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5cc0dc/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
index 4aba4e3..49cb513 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
@@ -73,17 +73,24 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.test.GenericTestUtils;
 
-import com.google.common.base.Supplier;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.filefilter.TrueFileFilter;
+
+import com.google.common.base.Supplier;
+
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
  * Fine-grain testing of block files and locations after volume failure.
  */
 public class TestDataNodeVolumeFailure {
+  private final static Logger LOG = LoggerFactory.getLogger(
+      TestDataNodeVolumeFailure.class);
   final private int block_size = 512;
   MiniDFSCluster cluster = null;
   private Configuration conf;
@@ -414,6 +421,108 @@ public class TestDataNodeVolumeFailure {
   }
 
   /**
+   * Test if there is volume failure, the DataNode will fail to start.
+   *
+   * We fail a volume by setting the parent directory non-writable.
+   */
+  @Test (timeout = 120000)
+  public void testDataNodeFailToStartWithVolumeFailure() throws Exception {
+    // Method to simulate volume failures is currently not supported on Windows.
+    assumeNotWindows();
+
+    failedDir = new File(dataDir, "failedDir");
+    assertTrue("Failed to fail a volume by setting it non-writable",
+        failedDir.mkdir() && failedDir.setReadOnly());
+
+    startNewDataNodeWithDiskFailure(new File(failedDir, "newDir1"), false);
+  }
+
+  /**
+   * DataNode will start and tolerate one failing disk according to config.
+   *
+   * We fail a volume by setting the parent directory non-writable.
+   */
+  @Test (timeout = 120000)
+  public void testDNStartAndTolerateOneVolumeFailure() throws Exception {
+    // Method to simulate volume failures is currently not supported on Windows.
+    assumeNotWindows();
+
+    failedDir = new File(dataDir, "failedDir");
+    assertTrue("Failed to fail a volume by setting it non-writable",
+        failedDir.mkdir() && failedDir.setReadOnly());
+
+    startNewDataNodeWithDiskFailure(new File(failedDir, "newDir1"), true);
+  }
+
+  /**
+   * Test if data directory is not readable/writable, DataNode won't start.
+   */
+  @Test (timeout = 120000)
+  public void testDNFailToStartWithDataDirNonWritable() throws Exception {
+    // Method to simulate volume failures is currently not supported on Windows.
+    assumeNotWindows();
+
+    final File readOnlyDir = new File(dataDir, "nonWritable");
+    assertTrue("Set the data dir permission non-writable",
+        readOnlyDir.mkdir() && readOnlyDir.setReadOnly());
+
+    startNewDataNodeWithDiskFailure(new File(readOnlyDir, "newDir1"), false);
+  }
+
+  /**
+   * DataNode will start and tolerate one non-writable data directory
+   * according to config.
+   */
+  @Test (timeout = 120000)
+  public void testDNStartAndTolerateOneDataDirNonWritable() throws Exception {
+    // Method to simulate volume failures is currently not supported on Windows.
+    assumeNotWindows();
+
+    final File readOnlyDir = new File(dataDir, "nonWritable");
+    assertTrue("Set the data dir permission non-writable",
+        readOnlyDir.mkdir() && readOnlyDir.setReadOnly());
+    startNewDataNodeWithDiskFailure(new File(readOnlyDir, "newDir1"), true);
+  }
+
+  /**
+   * @param badDataDir bad data dir, either disk failure or non-writable
+   * @param tolerated true if one volume failure is allowed else false
+   */
+  private void startNewDataNodeWithDiskFailure(File badDataDir,
+      boolean tolerated) throws Exception {
+    final File data5 = new File(dataDir, "data5");
+    final String newDirs = badDataDir.toString() + "," + data5.toString();
+    final Configuration newConf = new Configuration(conf);
+    newConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, newDirs);
+    LOG.info("Setting dfs.datanode.data.dir for new DataNode as {}", newDirs);
+    newConf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY,
+        tolerated ? 1 : 0);
+
+    // bring up one more DataNode
+    assertEquals(repl, cluster.getDataNodes().size());
+    cluster.startDataNodes(newConf, 1, false, null, null);
+    assertEquals(repl + 1, cluster.getDataNodes().size());
+
+    if (tolerated) {
+      // create new file and it should be able to replicate to 3 nodes
+      final Path p = new Path("/test1.txt");
+      DFSTestUtil.createFile(fs, p, block_size * blocks_num, (short) 3, 1L);
+      DFSTestUtil.waitReplication(fs, p, (short) (repl + 1));
+    } else {
+      // DataNode should stop soon if it does not tolerate disk failure
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          final String bpid = cluster.getNamesystem().getBlockPoolId();
+          final BPOfferService bpos = cluster.getDataNodes().get(2)
+              .getBPOfferService(bpid);
+          return !bpos.isAlive();
+        }
+      }, 100, 30 * 1000);
+    }
+  }
+
+  /**
    * verifies two things:
    *  1. number of locations of each block in the name node
    *   matches number of actual files


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message