Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id EFBA61995D for ; Thu, 14 Apr 2016 06:18:25 +0000 (UTC) Received: (qmail 3545 invoked by uid 500); 14 Apr 2016 06:18:20 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 3300 invoked by uid 500); 14 Apr 2016 06:18:20 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 2786 invoked by uid 99); 14 Apr 2016 06:18:20 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 14 Apr 2016 06:18:20 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1471AEAB4B; Thu, 14 Apr 2016 06:18:20 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: aengineer@apache.org To: common-commits@hadoop.apache.org Date: Thu, 14 Apr 2016 06:18:34 -0000 Message-Id: <9d87c87670ba402fbbdb15c2d4177e34@git.apache.org> In-Reply-To: <9bd1969577ad4b889dfe6faebde00caf@git.apache.org> References: <9bd1969577ad4b889dfe6faebde00caf@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [16/21] hadoop git commit: HDFS-10279. Improve validation of the configured number of tolerated failed volumes. Contributed by Lin Yiqun. HDFS-10279. Improve validation of the configured number of tolerated failed volumes. Contributed by Lin Yiqun. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/314aa21a Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/314aa21a Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/314aa21a Branch: refs/heads/HDFS-1312 Commit: 314aa21a89134fac68ac3cb95efdeb56bd3d7b05 Parents: 192112d Author: Andrew Wang Authored: Wed Apr 13 16:39:50 2016 -0700 Committer: Andrew Wang Committed: Wed Apr 13 16:39:50 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hdfs/server/datanode/DNConf.java | 18 ++++++++++++++++++ .../hadoop/hdfs/server/datanode/DataNode.java | 9 +++++++++ .../datanode/fsdataset/impl/FsDatasetImpl.java | 13 ++----------- .../TestDataNodeVolumeFailureToleration.java | 19 +++++++++++++++++-- 4 files changed, 46 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/314aa21a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java index 5cff2d3..b616414 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java @@ -114,6 +114,9 @@ public class DNConf { // Allow LAZY_PERSIST writes from non-local clients? private final boolean allowNonLocalLazyPersist; + private final int volFailuresTolerated; + private final int volsConfigured; + public DNConf(Configuration conf) { this.conf = conf; socketTimeout = conf.getInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY, @@ -238,6 +241,13 @@ public class DNConf { this.bpReadyTimeout = conf.getLong( DFS_DATANODE_BP_READY_TIMEOUT_KEY, DFS_DATANODE_BP_READY_TIMEOUT_DEFAULT); + + this.volFailuresTolerated = + conf.getInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, + DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT); + String[] dataDirs = + conf.getTrimmedStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY); + this.volsConfigured = (dataDirs == null) ? 0 : dataDirs.length; } // We get minimumNameNodeVersion via a method so it can be mocked out in tests. @@ -363,4 +373,12 @@ public class DNConf { public long getLifelineIntervalMs() { return lifelineIntervalMs; } + + public int getVolFailuresTolerated() { + return volFailuresTolerated; + } + + public int getVolsConfigured() { + return volsConfigured; + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/314aa21a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 625eb3f..288fc3e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1268,6 +1268,15 @@ public class DataNode extends ReconfigurableBase LOG.info("Starting DataNode with maxLockedMemory = " + dnConf.maxLockedMemory); + int volFailuresTolerated = dnConf.getVolFailuresTolerated(); + int volsConfigured = dnConf.getVolsConfigured(); + if (volFailuresTolerated < 0 || volFailuresTolerated >= volsConfigured) { + throw new DiskErrorException("Invalid value configured for " + + "dfs.datanode.failed.volumes.tolerated - " + volFailuresTolerated + + ". Value configured is either less than 0 or >= " + + "to the number of configured volumes (" + volsConfigured + ")."); + } + storage = new DataStorage(); // global DN settings http://git-wip-us.apache.org/repos/asf/hadoop/blob/314aa21a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 381c799..f7e0aae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -268,24 +268,15 @@ class FsDatasetImpl implements FsDatasetSpi { this.smallBufferSize = DFSUtilClient.getSmallBufferSize(conf); // The number of volumes required for operation is the total number // of volumes minus the number of failed volumes we can tolerate. - volFailuresTolerated = - conf.getInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, - DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT); + volFailuresTolerated = datanode.getDnConf().getVolFailuresTolerated(); - String[] dataDirs = conf.getTrimmedStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY); Collection dataLocations = DataNode.getStorageLocations(conf); List volumeFailureInfos = getInitialVolumeFailureInfos( dataLocations, storage); - int volsConfigured = (dataDirs == null) ? 0 : dataDirs.length; + int volsConfigured = datanode.getDnConf().getVolsConfigured(); int volsFailed = volumeFailureInfos.size(); - if (volFailuresTolerated < 0 || volFailuresTolerated >= volsConfigured) { - throw new DiskErrorException("Invalid value configured for " - + "dfs.datanode.failed.volumes.tolerated - " + volFailuresTolerated - + ". Value configured is either less than 0 or >= " - + "to the number of configured volumes (" + volsConfigured + ")."); - } if (volsFailed > volFailuresTolerated) { throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + storage.getNumStorageDirs() http://git-wip-us.apache.org/repos/asf/hadoop/blob/314aa21a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java index 1eb8bca..2f8239e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java @@ -34,6 +34,8 @@ import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -229,9 +231,22 @@ public class TestDataNodeVolumeFailureToleration { prepareDirToFail(dirs[i]); } restartDatanodes(volumesTolerated, manageDfsDirs); - assertEquals(expectedBPServiceState, cluster.getDataNodes().get(0) - .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId())); + } catch (DiskErrorException e) { + GenericTestUtils.assertExceptionContains("Invalid value configured for " + + "dfs.datanode.failed.volumes.tolerated", e); } finally { + boolean bpServiceState; + // If the datanode not registered successfully, + // because the invalid value configured for tolerated volumes + if (cluster.getDataNodes().size() == 0) { + bpServiceState = false; + } else { + bpServiceState = + cluster.getDataNodes().get(0) + .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId()); + } + assertEquals(expectedBPServiceState, bpServiceState); + for (File dir : dirs) { FileUtil.chmod(dir.toString(), "755"); }