Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 0E4FF2009D9 for ; Thu, 19 May 2016 21:46:53 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 0CBE0160A1D; Thu, 19 May 2016 19:46:53 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 03087160A00 for ; Thu, 19 May 2016 21:46:51 +0200 (CEST) Received: (qmail 47935 invoked by uid 500); 19 May 2016 19:46:50 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 47772 invoked by uid 99); 19 May 2016 19:46:50 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 19 May 2016 19:46:50 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 7A76BDFF13; Thu, 19 May 2016 19:46:50 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: arp@apache.org To: common-commits@hadoop.apache.org Date: Thu, 19 May 2016 19:46:51 -0000 Message-Id: In-Reply-To: <25255808f6d94e1e9958e0ef532b7599@git.apache.org> References: <25255808f6d94e1e9958e0ef532b7599@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [02/50] [abbrv] hadoop git commit: HDFS-8449. Add tasks count metrics to datanode for ECWorker. Contributed by Bo Li. archived-at: Thu, 19 May 2016 19:46:53 -0000 HDFS-8449. Add tasks count metrics to datanode for ECWorker. Contributed by Bo Li. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ad944112 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ad944112 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ad944112 Branch: refs/heads/HDFS-7240 Commit: ad9441122f31547fcab29f50e64d52a8895906b6 Parents: 4578820 Author: Kai Zheng Authored: Sun May 15 23:39:09 2016 -0700 Committer: Kai Zheng Committed: Sun May 15 23:39:09 2016 -0700 ---------------------------------------------------------------------- .../erasurecode/StripedReconstructor.java | 4 +- .../datanode/metrics/DataNodeMetrics.java | 14 ++ .../apache/hadoop/hdfs/StripedFileTestUtil.java | 32 +++- .../hadoop/hdfs/TestReconstructStripedFile.java | 27 +--- .../TestDataNodeErasureCodingMetrics.java | 153 +++++++++++++++++++ 5 files changed, 204 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/ad944112/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java index 1b59b22..c80bf96 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java @@ -179,11 +179,11 @@ class StripedReconstructor implements Runnable { // block replication. } catch (Throwable e) { LOG.warn("Failed to reconstruct striped block: {}", blockGroup, e); + datanode.getMetrics().incrECFailedReconstructionTasks(); } finally { datanode.decrementXmitsInProgress(); - + datanode.getMetrics().incrECReconstructionTasks(); stripedReader.close(); - stripedWriter.close(); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/ad944112/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index 085762b..3d504d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -126,6 +126,11 @@ public class DataNodeMetrics { @Metric MutableRate sendDataPacketTransferNanos; final MutableQuantiles[] sendDataPacketTransferNanosQuantiles; + @Metric("Count of erasure coding reconstruction tasks") + MutableCounterLong ecReconstructionTasks; + @Metric("Count of erasure coding failed reconstruction tasks") + MutableCounterLong ecFailedReconstructionTasks; + final MetricsRegistry registry = new MetricsRegistry("datanode"); final String name; JvmMetrics jvmMetrics = null; @@ -415,4 +420,13 @@ public class DataNodeMetrics { q.add(latencyMs); } } + + public void incrECReconstructionTasks() { + ecReconstructionTasks.incr(); + } + + public void incrECFailedReconstructionTasks() { + ecFailedReconstructionTasks.incr(); + } + } http://git-wip-us.apache.org/repos/asf/hadoop/blob/ad944112/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java index 3c58133..6dcccc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java @@ -74,7 +74,7 @@ public class StripedFileTestUtil { static int numDNs = NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS + 2; static int BLOCK_GROUP_SIZE = blockSize * NUM_DATA_BLOCKS; - static byte[] generateBytes(int cnt) { + public static byte[] generateBytes(int cnt) { byte[] bytes = new byte[cnt]; for (int i = 0; i < cnt; i++) { bytes[i] = getByte(i); @@ -502,4 +502,34 @@ public class StripedFileTestUtil { } } } + + /** + * Wait for the reconstruction to be finished when the file has + * corrupted blocks. + */ + public static LocatedBlocks waitForReconstructionFinished(Path file, + DistributedFileSystem fs, int groupSize) + throws Exception { + final int attempts = 60; + for (int i = 0; i < attempts; i++) { + LocatedBlocks locatedBlocks = getLocatedBlocks(file, fs); + LocatedStripedBlock lastBlock = + (LocatedStripedBlock)locatedBlocks.getLastLocatedBlock(); + DatanodeInfo[] storageInfos = lastBlock.getLocations(); + if (storageInfos.length >= groupSize) { + return locatedBlocks; + } + Thread.sleep(1000); + } + throw new IOException("Time out waiting for EC block reconstruction."); + } + + /** + * Get the located blocks of a file. + */ + public static LocatedBlocks getLocatedBlocks(Path file, + DistributedFileSystem fs) + throws IOException { + return fs.getClient().getLocatedBlocks(file.toString(), 0, Long.MAX_VALUE); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/ad944112/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java index 7155e74..36d2dbd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java @@ -269,7 +269,8 @@ public class TestReconstructStripedFile { DFSTestUtil.writeFile(fs, file, data); StripedFileTestUtil.waitBlockGroupsReported(fs, fileName); - LocatedBlocks locatedBlocks = getLocatedBlocks(file); + LocatedBlocks locatedBlocks = + StripedFileTestUtil.getLocatedBlocks(file, fs); assertEquals(locatedBlocks.getFileLength(), fileLen); LocatedStripedBlock lastBlock = @@ -325,7 +326,7 @@ public class TestReconstructStripedFile { int stoppedDN = generateErrors(errorMap, type); // Check the locatedBlocks of the file again - locatedBlocks = getLocatedBlocks(file); + locatedBlocks = StripedFileTestUtil.getLocatedBlocks(file, fs); lastBlock = (LocatedStripedBlock)locatedBlocks.getLastLocatedBlock(); storageInfos = lastBlock.getLocations(); assertEquals(storageInfos.length, groupSize - stoppedDN); @@ -338,7 +339,7 @@ public class TestReconstructStripedFile { } } - waitForReconstructionFinished(file, groupSize); + StripedFileTestUtil.waitForReconstructionFinished(file, fs, groupSize); targetDNs = sortTargetsByReplicas(blocks, targetDNs); @@ -381,26 +382,6 @@ public class TestReconstructStripedFile { return result; } - private LocatedBlocks waitForReconstructionFinished(Path file, int groupSize) - throws Exception { - final int ATTEMPTS = 60; - for (int i = 0; i < ATTEMPTS; i++) { - LocatedBlocks locatedBlocks = getLocatedBlocks(file); - LocatedStripedBlock lastBlock = - (LocatedStripedBlock)locatedBlocks.getLastLocatedBlock(); - DatanodeInfo[] storageInfos = lastBlock.getLocations(); - if (storageInfos.length >= groupSize) { - return locatedBlocks; - } - Thread.sleep(1000); - } - throw new IOException ("Time out waiting for EC block reconstruction."); - } - - private LocatedBlocks getLocatedBlocks(Path file) throws IOException { - return fs.getClient().getLocatedBlocks(file.toString(), 0, Long.MAX_VALUE); - } - /* * Tests that processErasureCodingTasks should not throw exceptions out due to * invalid ECTask submission. http://git-wip-us.apache.org/repos/asf/hadoop/blob/ad944112/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java new file mode 100644 index 0000000..e401fed --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.datanode; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.StripedFileTestUtil; +import org.apache.hadoop.hdfs.protocol.DatanodeID; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import static org.apache.hadoop.test.MetricsAsserts.assertCounter; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; +import static org.junit.Assert.assertEquals; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + + +/** + * This file tests the erasure coding metrics in DataNode. + */ +public class TestDataNodeErasureCodingMetrics { + public static final Log LOG = LogFactory. + getLog(TestDataNodeErasureCodingMetrics.class); + + private static final int DATA_BLK_NUM = StripedFileTestUtil.NUM_DATA_BLOCKS; + private static final int PARITY_BLK_NUM = + StripedFileTestUtil.NUM_PARITY_BLOCKS; + private static final int CELLSIZE = + StripedFileTestUtil.BLOCK_STRIPED_CELL_SIZE; + private static final int BLOCKSIZE = CELLSIZE; + private static final int GROUPSIZE = DATA_BLK_NUM + PARITY_BLK_NUM; + private static final int DN_NUM = GROUPSIZE + 1; + + private MiniDFSCluster cluster; + private Configuration conf; + private DistributedFileSystem fs; + + @Before + public void setup() throws IOException { + conf = new Configuration(); + + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DN_NUM).build(); + cluster.waitActive(); + cluster.getFileSystem().getClient().setErasureCodingPolicy("/", null); + fs = cluster.getFileSystem(); + } + + @After + public void tearDown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test(timeout = 120000) + public void testEcTasks() throws Exception { + DataNode workerDn = doTest("/testEcTasks"); + MetricsRecordBuilder rb = getMetrics(workerDn.getMetrics().name()); + assertCounter("EcReconstructionTasks", (long) 1, rb); + assertCounter("EcFailedReconstructionTasks", (long) 0, rb); + } + + private DataNode doTest(String fileName) throws Exception { + + Path file = new Path(fileName); + long fileLen = DATA_BLK_NUM * BLOCKSIZE; + final byte[] data = StripedFileTestUtil.generateBytes((int) fileLen); + DFSTestUtil.writeFile(fs, file, data); + StripedFileTestUtil.waitBlockGroupsReported(fs, fileName); + + LocatedBlocks locatedBlocks = + StripedFileTestUtil.getLocatedBlocks(file, fs); + //only one block group + LocatedStripedBlock lastBlock = + (LocatedStripedBlock)locatedBlocks.getLastLocatedBlock(); + DataNode workerDn = null; + DatanodeInfo[] locations = lastBlock.getLocations(); + assertEquals(locations.length, GROUPSIZE); + // we have ONE extra datanode in addition to the GROUPSIZE datanodes, here + // is to find the extra datanode that the reconstruction task will run on, + // according to the current block placement logic for striped files. + // This can be improved later to be flexible regardless wherever the task + // runs. + for (DataNode dn: cluster.getDataNodes()) { + boolean appear = false; + for (DatanodeInfo info: locations) { + if (dn.getDatanodeUuid().equals(info.getDatanodeUuid())) { + appear = true; + break; + } + } + if(!appear) { + workerDn = dn; + break; + } + } + byte[] indices = lastBlock.getBlockIndices(); + //corrupt the first block + DataNode toCorruptDn = cluster.getDataNodes().get(indices[0]); + toCorruptDn.shutdown(); + setDataNodeDead(toCorruptDn.getDatanodeId()); + DFSTestUtil.waitForDatanodeState(cluster, toCorruptDn.getDatanodeUuid(), + false, 10000 ); + final BlockManager bm = cluster.getNamesystem().getBlockManager(); + BlockManagerTestUtil.getComputedDatanodeWork(bm); + cluster.triggerHeartbeats(); + StripedFileTestUtil.waitForReconstructionFinished(file, fs, GROUPSIZE); + + return workerDn; + } + + private void setDataNodeDead(DatanodeID dnID) throws IOException { + DatanodeDescriptor dnd = + NameNodeAdapter.getDatanode(cluster.getNamesystem(), dnID); + DFSTestUtil.setDatanodeDead(dnd); + BlockManagerTestUtil.checkHeartbeat( + cluster.getNamesystem().getBlockManager()); + } + +} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org