Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 6E458200D24 for ; Tue, 19 Sep 2017 00:11:49 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 6CFA41609E0; Mon, 18 Sep 2017 22:11:49 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 458761609DF for ; Tue, 19 Sep 2017 00:11:48 +0200 (CEST) Received: (qmail 12935 invoked by uid 500); 18 Sep 2017 22:11:47 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 12905 invoked by uid 99); 18 Sep 2017 22:11:47 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 18 Sep 2017 22:11:47 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id CB964F5776; Mon, 18 Sep 2017 22:11:45 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: aengineer@apache.org To: common-commits@hadoop.apache.org Date: Mon, 18 Sep 2017 22:12:15 -0000 Message-Id: <1df2f9751c674429a410a1edefbae716@git.apache.org> In-Reply-To: <4fca8be1525f4e5482cdcccee6204656@git.apache.org> References: <4fca8be1525f4e5482cdcccee6204656@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [32/50] [abbrv] hadoop git commit: HDFS-12349. Improve log message when it could not alloc enough blocks for EC. (Lei (Eddy) Xu) archived-at: Mon, 18 Sep 2017 22:11:49 -0000 HDFS-12349. Improve log message when it could not alloc enough blocks for EC. (Lei (Eddy) Xu) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/fbe06b58 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/fbe06b58 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/fbe06b58 Branch: refs/heads/HDFS-7240 Commit: fbe06b58805aac4861fb27dfa273914b69e8bdc6 Parents: 3a8d57a Author: Lei Xu Authored: Fri Sep 15 12:12:42 2017 -0700 Committer: Lei Xu Committed: Fri Sep 15 12:12:42 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hdfs/DFSStripedOutputStream.java | 17 ++++++------ .../server/blockmanagement/BlockManager.java | 26 ++++++++++++------ .../hdfs/server/namenode/FSDirWriteFileOp.java | 24 ++++++++++------ .../TestDFSStripedOutputStreamWithFailure.java | 29 ++++++++++---------- .../datatransfer/sasl/TestSaslDataTransfer.java | 4 +-- .../blockmanagement/TestBlockManager.java | 3 +- .../blockmanagement/TestBlockStatsMXBean.java | 2 +- .../hdfs/server/namenode/TestDeadDatanode.java | 4 +-- 8 files changed, 63 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java index 7f05338..44db3a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java @@ -260,6 +260,7 @@ public class DFSStripedOutputStream extends DFSOutputStream private final Coordinator coordinator; private final CellBuffers cellBuffers; + private final ErasureCodingPolicy ecPolicy; private final RawErasureEncoder encoder; private final List streamers; private final DFSPacket[] currentPackets; // current Packet of each streamer @@ -286,7 +287,7 @@ public class DFSStripedOutputStream extends DFSOutputStream LOG.debug("Creating DFSStripedOutputStream for " + src); } - final ErasureCodingPolicy ecPolicy = stat.getErasureCodingPolicy(); + ecPolicy = stat.getErasureCodingPolicy(); final int numParityBlocks = ecPolicy.getNumParityUnits(); cellSize = ecPolicy.getCellSize(); numDataBlocks = ecPolicy.getNumDataUnits(); @@ -478,11 +479,6 @@ public class DFSStripedOutputStream extends DFSOutputStream final LocatedBlock lb = addBlock(excludedNodes, dfsClient, src, currentBlockGroup, fileId, favoredNodes, getAddBlockFlags()); assert lb.isStriped(); - if (lb.getLocations().length < numDataBlocks) { - throw new IOException("Failed to get " + numDataBlocks - + " nodes from namenode: blockGroupSize= " + numAllBlocks - + ", blocks.length= " + lb.getLocations().length); - } // assign the new block to the current block group currentBlockGroup = lb.getBlock(); blockGroupIndex++; @@ -494,11 +490,16 @@ public class DFSStripedOutputStream extends DFSOutputStream StripedDataStreamer si = getStripedDataStreamer(i); assert si.isHealthy(); if (blocks[i] == null) { + // allocBlock() should guarantee that all data blocks are successfully + // allocated. + assert i >= numDataBlocks; // Set exception and close streamer as there is no block locations // found for the parity block. - LOG.warn("Failed to get block location for parity block, index=" + i); + LOG.warn("Cannot allocate parity block(index={}, policy={}). " + + "Not enough datanodes? Exclude nodes={}", i, ecPolicy.getName(), + excludedNodes); si.getLastException().set( - new IOException("Failed to get following block, i=" + i)); + new IOException("Failed to get parity block, index=" + i)); si.getErrorState().setInternalError(); si.close(true); } else { http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index f4e5cb4..f33ec63 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2057,6 +2057,7 @@ public class BlockManager implements BlockStatsMXBean { final List favoredNodes, final byte storagePolicyID, final BlockType blockType, + final ErasureCodingPolicy ecPolicy, final EnumSet flags) throws IOException { List favoredDatanodeDescriptors = getDatanodeDescriptors(favoredNodes); @@ -2067,14 +2068,23 @@ public class BlockManager implements BlockStatsMXBean { final DatanodeStorageInfo[] targets = blockplacement.chooseTarget(src, numOfReplicas, client, excludedNodes, blocksize, favoredDatanodeDescriptors, storagePolicy, flags); - if (targets.length < minReplication) { - throw new IOException("File " + src + " could only be replicated to " - + targets.length + " nodes instead of minReplication (=" - + minReplication + "). There are " - + getDatanodeManager().getNetworkTopology().getNumOfLeaves() - + " datanode(s) running and " - + (excludedNodes == null? "no": excludedNodes.size()) - + " node(s) are excluded in this operation."); + + final String errorMessage = "File %s could only be written to %d of " + + "the %d %s. There are %d datanode(s) running and %s " + + "node(s) are excluded in this operation."; + if (blockType == BlockType.CONTIGUOUS && targets.length < minReplication) { + throw new IOException(String.format(errorMessage, src, + targets.length, minReplication, "minReplication nodes", + getDatanodeManager().getNetworkTopology().getNumOfLeaves(), + (excludedNodes == null? "no": excludedNodes.size()))); + } else if (blockType == BlockType.STRIPED && + targets.length < ecPolicy.getNumDataUnits()) { + throw new IOException( + String.format(errorMessage, src, targets.length, + ecPolicy.getNumDataUnits(), + String.format("required nodes for %s", ecPolicy.getName()), + getDatanodeManager().getNetworkTopology().getNumOfLeaves(), + (excludedNodes == null ? "no" : excludedNodes.size()))); } return targets; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java index 012e916..b202212 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java @@ -201,7 +201,7 @@ class FSDirWriteFileOp { } storagePolicyID = pendingFile.getStoragePolicyID(); return new ValidateAddBlockResult(blockSize, numTargets, storagePolicyID, - clientMachine, blockType); + clientMachine, blockType, ecPolicy); } static LocatedBlock makeLocatedBlock(FSNamesystem fsn, BlockInfo blk, @@ -286,7 +286,7 @@ class FSDirWriteFileOp { return bm.chooseTarget4NewBlock(src, r.numTargets, clientNode, excludedNodesSet, r.blockSize, favoredNodesList, r.storagePolicyID, - r.blockType, flags); + r.blockType, r.ecPolicy, flags); } /** @@ -831,20 +831,28 @@ class FSDirWriteFileOp { } static class ValidateAddBlockResult { - final long blockSize; - final int numTargets; - final byte storagePolicyID; - final String clientMachine; - final BlockType blockType; + private final long blockSize; + private final int numTargets; + private final byte storagePolicyID; + private final String clientMachine; + private final BlockType blockType; + private final ErasureCodingPolicy ecPolicy; ValidateAddBlockResult( long blockSize, int numTargets, byte storagePolicyID, - String clientMachine, BlockType blockType) { + String clientMachine, BlockType blockType, + ErasureCodingPolicy ecPolicy) { this.blockSize = blockSize; this.numTargets = numTargets; this.storagePolicyID = storagePolicyID; this.clientMachine = clientMachine; this.blockType = blockType; + this.ecPolicy = ecPolicy; + + if (blockType == BlockType.STRIPED) { + Preconditions.checkArgument(ecPolicy != null, + "ecPolicy is not specified for striped block"); + } } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java index 231f260..ea889e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java @@ -42,6 +42,7 @@ import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawErasureCoderFactory; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.StringUtils; import org.apache.log4j.Level; import org.junit.Assert; @@ -284,7 +285,7 @@ public class TestDFSStripedOutputStreamWithFailure { @Test(timeout = 90000) public void testAddBlockWhenNoSufficientDataBlockNumOfNodes() - throws IOException { + throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); try { @@ -303,20 +304,18 @@ public class TestDFSStripedOutputStreamWithFailure { DatanodeReportType.LIVE); assertEquals("Mismatches number of live Dns ", numDatanodes, info.length); final Path dirFile = new Path(dir, "ecfile"); - FSDataOutputStream out; - try { - out = dfs.create(dirFile, true); - out.write("something".getBytes()); - out.flush(); - out.close(); - Assert.fail("Failed to validate available dns against blkGroupSize"); - } catch (IOException ioe) { - // expected - GenericTestUtils.assertExceptionContains("Failed to get " + - dataBlocks + " nodes from namenode: blockGroupSize= " + - (dataBlocks + parityBlocks) + ", blocks.length= " + - numDatanodes, ioe); - } + LambdaTestUtils.intercept( + IOException.class, + "File " + dirFile + " could only be written to " + + numDatanodes + " of the " + dataBlocks + " required nodes for " + + getEcPolicy().getName(), + () -> { + try (FSDataOutputStream out = dfs.create(dirFile, true)) { + out.write("something".getBytes()); + out.flush(); + } + return 0; + }); } finally { tearDown(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java index 8555e5d..2fe0a1c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java @@ -118,7 +118,7 @@ public class TestSaslDataTransfer extends SaslDataTransferTestCase { HdfsConfiguration clientConf = new HdfsConfiguration(clusterConf); clientConf.set(DFS_DATA_TRANSFER_PROTECTION_KEY, "authentication"); exception.expect(IOException.class); - exception.expectMessage("could only be replicated to 0 nodes"); + exception.expectMessage("could only be written to 0"); doTest(clientConf); } @@ -140,7 +140,7 @@ public class TestSaslDataTransfer extends SaslDataTransferTestCase { "configured or not supported in client"); } catch (IOException e) { GenericTestUtils.assertMatches(e.getMessage(), - "could only be replicated to 0 nodes"); + "could only be written to 0"); } finally { logs.stopCapturing(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index 4c1ea7b..10289ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -1030,8 +1030,7 @@ public class TestBlockManager { 0x1BAD5EED); } catch (RemoteException re) { - GenericTestUtils.assertExceptionContains("nodes instead of " - + "minReplication", re); + GenericTestUtils.assertExceptionContains("of the 1 minReplication", re); } } finally { http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java index b7583c4..bcf38d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java @@ -182,7 +182,7 @@ public class TestBlockStatsMXBean { fail("Should throw exception, becuase no DISK storage available"); } catch (Exception e) { assertTrue(e.getMessage().contains( - "could only be replicated to 0 nodes instead")); + "could only be written to 0 of the 1 minReplication")); } // wait for heartbeat Thread.sleep(6000); http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbe06b58/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java index 74be90c..b6c1318 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java @@ -175,8 +175,8 @@ public class TestDeadDatanode { // choose the targets, but local node should not get selected as this is not // part of the cluster anymore DatanodeStorageInfo[] results = bm.chooseTarget4NewBlock("/hello", 3, - clientNode, new HashSet(), 256 * 1024 * 1024L, null, (byte) 7, - BlockType.CONTIGUOUS, null); + clientNode, new HashSet<>(), 256 * 1024 * 1024L, null, (byte) 7, + BlockType.CONTIGUOUS, null, null); for (DatanodeStorageInfo datanodeStorageInfo : results) { assertFalse("Dead node should not be choosen", datanodeStorageInfo .getDatanodeDescriptor().equals(clientNode)); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org