Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id A7525200C7D for ; Tue, 2 May 2017 00:37:46 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id A5E8B160BC2; Mon, 1 May 2017 22:37:46 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 97DF3160BC1 for ; Tue, 2 May 2017 00:37:45 +0200 (CEST) Received: (qmail 53000 invoked by uid 500); 1 May 2017 22:37:44 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 52991 invoked by uid 99); 1 May 2017 22:37:44 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 May 2017 22:37:44 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 3C2CDDF9A3; Mon, 1 May 2017 22:37:44 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kihwal@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HDFS-11714. Newly added NN storage directory won't get initialized and cause space exhaustion. Contributed by Kihwal Lee. Added a CHANGES.txt entry (cherry picked from commit f77d138725662197ca0be874e9ec4dc430db2a4f) Date: Mon, 1 May 2017 22:37:44 +0000 (UTC) archived-at: Mon, 01 May 2017 22:37:46 -0000 Repository: hadoop Updated Branches: refs/heads/branch-2.7 139327d34 -> 130a0e215 HDFS-11714. Newly added NN storage directory won't get initialized and cause space exhaustion. Contributed by Kihwal Lee. Added a CHANGES.txt entry (cherry picked from commit f77d138725662197ca0be874e9ec4dc430db2a4f) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/130a0e21 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/130a0e21 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/130a0e21 Branch: refs/heads/branch-2.7 Commit: 130a0e2156861acf4f2eff2845643bc470672af2 Parents: 139327d Author: Kihwal Lee Authored: Mon May 1 17:36:58 2017 -0500 Committer: Kihwal Lee Committed: Mon May 1 17:36:58 2017 -0500 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hadoop/hdfs/server/namenode/FSImage.java | 52 +++++++++++++++++++- .../namenode/ha/TestStandbyCheckpoints.java | 40 +++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/130a0e21/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6a74719..48a7546 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -256,6 +256,9 @@ Release 2.7.4 - UNRELEASED HDFS-11609. Some blocks can be permanently lost if nodes are decommissioned while dead. (kihwal) + HDFS-11714. Newly added NN storage directory won't get initialized + and cause space exhaustion. (kihwal) + Release 2.7.3 - 2016-08-25 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/130a0e21/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index 0db4af2..da3d8be 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -101,6 +101,16 @@ public class FSImage implements Closeable { protected NNStorageRetentionManager archivalManager; + /** + * The collection of newly added storage directories. These are partially + * formatted then later fully populated along with a VERSION file. + * For HA, the second part is done when the next checkpoint is saved. + * This set will be cleared once a VERSION file is created. + * For non-HA, a new fsimage will be locally generated along with a new + * VERSION file. This set is not used for non-HA mode. + */ + private Set newDirs = null; + /* Used to make sure there are no concurrent checkpoints for a given txid * The checkpoint here could be one of the following operations. * a. checkpoint when NN is in standby. @@ -265,9 +275,26 @@ public class FSImage implements Closeable { throw new IOException(StorageState.NON_EXISTENT + " state cannot be here"); case NOT_FORMATTED: + // Create a dir structure, but not the VERSION file. The presence of + // VERSION is checked in the inspector's needToSave() method and + // saveNamespace is triggered if it is absent. This will bring + // the storage state uptodate along with a new VERSION file. + // If HA is enabled, NNs start up as standby so saveNamespace is not + // triggered. LOG.info("Storage directory " + sd.getRoot() + " is not formatted."); LOG.info("Formatting ..."); sd.clearDirectory(); // create empty currrent dir + // For non-HA, no further action is needed here, as saveNamespace will + // take care of the rest. + if (!target.isHaEnabled()) { + continue; + } + // If HA is enabled, save the dirs to create a version file later when + // a checkpoint image is saved. + if (newDirs == null) { + newDirs = new HashSet(); + } + newDirs.add(sd); break; default: break; @@ -293,7 +320,27 @@ public class FSImage implements Closeable { return loadFSImage(target, startOpt, recovery); } - + + /** + * Create a VERSION file in the newly added storage directories. + */ + private void initNewDirs() { + if (newDirs == null) { + return; + } + for (StorageDirectory sd : newDirs) { + try { + storage.writeProperties(sd); + LOG.info("Wrote VERSION in the new storage, " + sd.getCurrentDir()); + } catch (IOException e) { + // Failed to create a VERSION file. Report the error. + storage.reportErrorOnFile(sd.getVersionFile()); + } + } + newDirs.clear(); + newDirs = null; + } + /** * For each storage directory, performs recovery of incomplete transitions * (eg. upgrade, rollback, checkpoint) and inserts the directory's storage @@ -1399,6 +1446,9 @@ public class FSImage implements Closeable { if (txid > storage.getMostRecentCheckpointTxId()) { storage.setMostRecentCheckpointInfo(txid, Time.now()); } + + // Create a version file in any new storage directory. + initNewDirs(); } @Override http://git-wip-us.apache.org/repos/asf/hadoop/blob/130a0e21/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java index 6a0fcd7..3d56408 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.server.common.Util; import org.apache.hadoop.hdfs.server.namenode.*; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; import org.apache.hadoop.hdfs.util.Canceler; @@ -41,6 +42,7 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; +import org.apache.hadoop.test.PathUtils; import org.apache.hadoop.util.ThreadUtil; import org.junit.After; import org.junit.Before; @@ -167,6 +169,44 @@ public class TestStandbyCheckpoints { purgeLogsOlderThan(Mockito.anyLong()); } + @Test + public void testNewDirInitAfterCheckpointing() throws Exception { + File hdfsDir = new File(PathUtils.getTestDir(TestStandbyCheckpoints.class), + "testNewDirInitAfterCheckpointing"); + File nameDir = new File(hdfsDir, "name1"); + assert nameDir.mkdirs(); + + // Restart nn0 with an additional name dir. + String existingDir = cluster.getConfiguration(0). + get(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY); + cluster.getConfiguration(0).set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, + existingDir + "," + Util.fileAsURI(nameDir).toString()); + cluster.restartNameNode(0); + nn0 = cluster.getNameNode(0); + cluster.transitionToActive(0); + + // "current" is created, but current/VERSION isn't. + File currDir = new File(nameDir, "current"); + File versionFile = new File(currDir, "VERSION"); + assert currDir.exists(); + assert !versionFile.exists(); + + // Trigger a checkpointing and upload. + doEdits(0, 10); + HATestUtil.waitForStandbyToCatchUp(nn0, nn1); + + // The version file will be created if a checkpoint is uploaded. + // Wait for it to happen up to 10 seconds. + for (int i = 0; i < 20; i++) { + if (versionFile.exists()) { + break; + } + Thread.sleep(500); + } + // VERSION must have been created. + assert versionFile.exists(); + } + /** * Test for the case when both of the NNs in the cluster are * in the standby state, and thus are both creating checkpoints --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org