hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dran...@apache.org
Subject hadoop git commit: HDFS-10720. Fix intermittent test failure of TestDataNodeErasureCodingMetrics. Contributed by Rakesh R
Date Thu, 11 Aug 2016 03:20:46 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-3.0.0-alpha1 7adfde411 -> 2b282820a


HDFS-10720. Fix intermittent test failure of TestDataNodeErasureCodingMetrics. Contributed
by Rakesh R


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2b282820
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2b282820
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2b282820

Branch: refs/heads/branch-3.0.0-alpha1
Commit: 2b282820afa748cf275c1a1ace98508f1cb90391
Parents: 7adfde4
Author: Kai Zheng <kai.zheng@intel.com>
Authored: Fri Aug 12 11:17:32 2016 +0800
Committer: Kai Zheng <kai.zheng@intel.com>
Committed: Fri Aug 12 11:17:32 2016 +0800

----------------------------------------------------------------------
 .../TestDataNodeErasureCodingMetrics.java       | 46 ++++++++++++++++----
 1 file changed, 37 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2b282820/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java
index c7e99c2..919fb72 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java
@@ -39,12 +39,15 @@ import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
 import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
 import java.io.IOException;
+import java.util.Arrays;
 
 
 /**
@@ -125,39 +128,64 @@ public class TestDataNodeErasureCodingMetrics {
     DataNode workerDn = null;
     DatanodeInfo[] locations = lastBlock.getLocations();
     assertEquals(locations.length, GROUPSIZE);
+
     // we have ONE extra datanode in addition to the GROUPSIZE datanodes, here
     // is to find the extra datanode that the reconstruction task will run on,
     // according to the current block placement logic for striped files.
     // This can be improved later to be flexible regardless wherever the task
     // runs.
-    for (DataNode dn: cluster.getDataNodes()) {
+    for (DataNode dn : cluster.getDataNodes()) {
       boolean appear = false;
-      for (DatanodeInfo info: locations) {
+      for (DatanodeInfo info : locations) {
         if (dn.getDatanodeUuid().equals(info.getDatanodeUuid())) {
           appear = true;
           break;
         }
       }
-      if(!appear) {
+      if (!appear) {
         workerDn = dn;
         break;
       }
     }
-    byte[] indices = lastBlock.getBlockIndices();
-    //corrupt the first block
-    DataNode toCorruptDn = cluster.getDataNodes().get(indices[0]);
+    // Get a datanode from the block locations.
+    LOG.info("Block locations: " + Arrays.asList(locations));
+    LOG.info("Erasure coding worker datanode: " + workerDn);
+    assertNotNull("Failed to find a worker datanode", workerDn);
+
+    DataNode toCorruptDn = cluster.getDataNode(locations[0].getIpcPort());
+    LOG.info("Datanode to be corrupted: " + toCorruptDn);
+    assertNotNull("Failed to find a datanode to be corrupted", toCorruptDn);
     toCorruptDn.shutdown();
     setDataNodeDead(toCorruptDn.getDatanodeId());
     DFSTestUtil.waitForDatanodeState(cluster, toCorruptDn.getDatanodeUuid(),
-        false, 10000 );
-    final BlockManager bm = cluster.getNamesystem().getBlockManager();
-    BlockManagerTestUtil.getComputedDatanodeWork(bm);
+        false, 10000);
+
+    int workCount = getComputedDatanodeWork();
+    assertTrue("Wrongly computed block reconstruction work", workCount > 0);
     cluster.triggerHeartbeats();
     StripedFileTestUtil.waitForReconstructionFinished(file, fs, GROUPSIZE);
 
     return workerDn;
   }
 
+  private int getComputedDatanodeWork()
+      throws IOException, InterruptedException {
+    final BlockManager bm = cluster.getNamesystem().getBlockManager();
+    // Giving a grace period to compute datanode work.
+    int workCount = 0;
+    int retries = 20;
+    while (retries > 0) {
+      workCount = BlockManagerTestUtil.getComputedDatanodeWork(bm);
+      if (workCount > 0) {
+        break;
+      }
+      retries--;
+      Thread.sleep(500);
+    }
+    LOG.info("Computed datanode work: " + workCount + ", retries: " + retries);
+    return workCount;
+  }
+
   private void setDataNodeDead(DatanodeID dnID) throws IOException {
     DatanodeDescriptor dnd =
         NameNodeAdapter.getDatanode(cluster.getNamesystem(), dnID);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message