Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id DF754200BB3 for ; Wed, 2 Nov 2016 18:38:51 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id DE224160AFB; Wed, 2 Nov 2016 17:38:51 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 0CFF4160B0A for ; Wed, 2 Nov 2016 18:38:50 +0100 (CET) Received: (qmail 42039 invoked by uid 500); 2 Nov 2016 17:38:41 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 41934 invoked by uid 99); 2 Nov 2016 17:38:41 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Nov 2016 17:38:41 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9CD16E0FC4; Wed, 2 Nov 2016 17:38:41 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: rakeshr@apache.org To: common-commits@hadoop.apache.org Date: Wed, 02 Nov 2016 17:39:06 -0000 Message-Id: <024e2d295c47496197c4385c71862cbe@git.apache.org> In-Reply-To: <970658c95e7f433fbb0af3524bbdc612@git.apache.org> References: <970658c95e7f433fbb0af3524bbdc612@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [27/50] [abbrv] hadoop git commit: HDFS-11030. TestDataNodeVolumeFailure#testVolumeFailure is flaky (though passing). Contributed by Mingliang Liu archived-at: Wed, 02 Nov 2016 17:38:52 -0000 HDFS-11030. TestDataNodeVolumeFailure#testVolumeFailure is flaky (though passing). Contributed by Mingliang Liu Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0c49f73a Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0c49f73a Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0c49f73a Branch: refs/heads/HDFS-10285 Commit: 0c49f73a6c19ce0d0cd59cf8dfaa9a35f67f47ab Parents: 90dd3a8 Author: Mingliang Liu Authored: Thu Oct 20 13:44:25 2016 -0700 Committer: Mingliang Liu Committed: Mon Oct 31 16:00:33 2016 -0700 ---------------------------------------------------------------------- .../datanode/TestDataNodeVolumeFailure.java | 64 ++++++++++---------- 1 file changed, 33 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0c49f73a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index 9ffe7b6..4aba4e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -29,6 +29,7 @@ import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.net.Socket; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -52,7 +53,6 @@ import org.apache.hadoop.hdfs.RemotePeerFactory; import org.apache.hadoop.hdfs.client.impl.DfsClientConf; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.hdfs.protocol.Block; -import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -66,14 +66,16 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; -import org.apache.hadoop.hdfs.server.protocol.BlockReportContext; -import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; -import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; -import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; +import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.test.GenericTestUtils; + +import com.google.common.base.Supplier; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.TrueFileFilter; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -108,6 +110,7 @@ public class TestDataNodeVolumeFailure { conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, block_size); // Allow a single volume failure (there are two volumes) conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 30); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(dn_num).build(); cluster.waitActive(); fs = cluster.getFileSystem(); @@ -135,7 +138,7 @@ public class TestDataNodeVolumeFailure { * and that we can replicate to both datanodes even after a single volume * failure if the configuration parameter allows this. */ - @Test + @Test(timeout = 120000) public void testVolumeFailure() throws Exception { System.out.println("Data dir: is " + dataDir.getPath()); @@ -155,7 +158,7 @@ public class TestDataNodeVolumeFailure { // fail the volume // delete/make non-writable one of the directories (failed volume) data_fail = new File(dataDir, "data3"); - failedDir = MiniDFSCluster.getFinalizedDir(dataDir, + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, cluster.getNamesystem().getBlockPoolId()); if (failedDir.exists() && //!FileUtil.fullyDelete(failedDir) @@ -171,29 +174,26 @@ public class TestDataNodeVolumeFailure { // we need to make sure that the "failed" volume is being accessed - // and that will cause failure, blocks removal, "emergency" block report triggerFailure(filename, filesize); - - // make sure a block report is sent - DataNode dn = cluster.getDataNodes().get(1); //corresponds to dir data3 - String bpid = cluster.getNamesystem().getBlockPoolId(); - DatanodeRegistration dnR = dn.getDNRegistrationForBP(bpid); - - Map perVolumeBlockLists = - dn.getFSDataset().getBlockReports(bpid); - - // Send block report - StorageBlockReport[] reports = - new StorageBlockReport[perVolumeBlockLists.size()]; - - int reportIndex = 0; - for(Map.Entry kvPair : perVolumeBlockLists.entrySet()) { - DatanodeStorage dnStorage = kvPair.getKey(); - BlockListAsLongs blockList = kvPair.getValue(); - reports[reportIndex++] = - new StorageBlockReport(dnStorage, blockList); - } - - cluster.getNameNodeRpc().blockReport(dnR, bpid, reports, - new BlockReportContext(1, 0, System.nanoTime(), 0, true)); + // DN eventually have latest volume failure information for next heartbeat + final DataNode dn = cluster.getDataNodes().get(1); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + final VolumeFailureSummary summary = + dn.getFSDataset().getVolumeFailureSummary(); + return summary != null && + summary.getFailedStorageLocations() != null && + summary.getFailedStorageLocations().length == 1; + } + }, 10, 30 * 1000); + + // trigger DN to send heartbeat + DataNodeTestUtils.triggerHeartbeat(dn); + final BlockManager bm = cluster.getNamesystem().getBlockManager(); + // trigger NN handel heartbeat + BlockManagerTestUtil.checkHeartbeat(bm); + // NN now should have latest volume failure + assertEquals(1, cluster.getNamesystem().getVolumeFailuresTotal()); // verify number of blocks and files... verify(filename, filesize); @@ -492,9 +492,11 @@ public class TestDataNodeVolumeFailure { * @throws IOException */ private boolean deteteBlocks(File dir) { - File [] fileList = dir.listFiles(); + Collection fileList = FileUtils.listFiles(dir, + TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE); for(File f : fileList) { if(f.getName().startsWith(Block.BLOCK_FILE_PREFIX)) { + System.out.println("Deleting file " + f); if(!f.delete()) return false; --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org