Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id EE92A1050D for ; Fri, 7 Jun 2013 20:02:18 +0000 (UTC) Received: (qmail 54745 invoked by uid 500); 7 Jun 2013 20:02:18 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 54706 invoked by uid 500); 7 Jun 2013 20:02:18 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 54698 invoked by uid 99); 7 Jun 2013 20:02:18 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Jun 2013 20:02:18 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Jun 2013 20:02:16 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 8FBB123888E3; Fri, 7 Jun 2013 20:01:57 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1490803 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/server/namenode/ Date: Fri, 07 Jun 2013 20:01:57 -0000 To: hdfs-commits@hadoop.apache.org From: kihwal@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130607200157.8FBB123888E3@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kihwal Date: Fri Jun 7 20:01:55 2013 New Revision: 1490803 URL: http://svn.apache.org/r1490803 Log: HDFS-4832. Namenode doesn't change the number of missing blocks in safemode when DNs rejoin or leave. Contributed by Ravi Prakash. Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1490803&r1=1490802&r2=1490803&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Jun 7 20:01:55 2013 @@ -3123,6 +3123,9 @@ Release 0.23.9 - UNRELEASED HDFS-4862. SafeModeInfo.isManual() returns true when resources are low even if it wasn't entered into manually (Ravi Prakash via kihwal) + HDFS-4832. Namenode doesn't change the number of missing blocks in + safemode when DNs rejoin or leave (Ravi Prakash via kihwal) + Release 0.23.8 - 2013-06-05 INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java?rev=1490803&r1=1490802&r2=1490803&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java Fri Jun 7 20:01:55 2013 @@ -2156,7 +2156,7 @@ assert storedBlock.findDatanode(dn) < 0 return storedBlock; } - // do not try to handle over/under-replicated blocks during safe mode + // do not try to handle over/under-replicated blocks during first safe mode if (!namesystem.isPopulatingReplQueues()) { return storedBlock; } Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java?rev=1490803&r1=1490802&r2=1490803&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java Fri Jun 7 20:01:55 2013 @@ -1168,7 +1168,13 @@ public class DatanodeManager { heartbeatManager.updateHeartbeat(nodeinfo, capacity, dfsUsed, remaining, blockPoolUsed, xceiverCount, failedVolumes); - + + // If we are in safemode, do not send back any recovery / replication + // requests. Don't even drain the existing queue of work. + if(namesystem.isInSafeMode()) { + return new DatanodeCommand[0]; + } + //check lease recovery BlockInfoUnderConstruction[] blocks = nodeinfo .getLeaseRecoveryCommand(Integer.MAX_VALUE); Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java?rev=1490803&r1=1490802&r2=1490803&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java Fri Jun 7 20:01:55 2013 @@ -223,7 +223,7 @@ class HeartbeatManager implements Datano final DatanodeManager dm = blockManager.getDatanodeManager(); // It's OK to check safe mode w/o taking the lock here, we re-check // for safe mode after taking the lock before removing a datanode. - if (namesystem.isInSafeMode()) { + if (namesystem.isInStartupSafeMode()) { return; } boolean allAlive = false; @@ -252,7 +252,7 @@ class HeartbeatManager implements Datano // acquire the fsnamesystem lock, and then remove the dead node. namesystem.writeLock(); try { - if (namesystem.isInSafeMode()) { + if (namesystem.isInStartupSafeMode()) { return; } synchronized(this) { Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1490803&r1=1490802&r2=1490803&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Jun 7 20:01:55 2013 @@ -4093,7 +4093,7 @@ public class FSNamesystem implements Nam * * @see SafeModeInfo */ - private SafeModeInfo(boolean resourcesLow) { + private SafeModeInfo(boolean resourcesLow, boolean isReplQueuesInited) { this.threshold = 1.5f; // this threshold can never be reached this.datanodeThreshold = Integer.MAX_VALUE; this.extension = Integer.MAX_VALUE; @@ -4102,6 +4102,7 @@ public class FSNamesystem implements Nam this.blockTotal = -1; this.blockSafe = -1; this.resourcesLow = resourcesLow; + this.initializedReplQueues = isReplQueuesInited; enter(); reportStatus("STATE* Safe mode is ON.", true); } @@ -4527,6 +4528,10 @@ public class FSNamesystem implements Nam && safeMode.isOn(); } + /** + * Check if replication queues are to be populated + * @return true when node is HAState.Active and not in the very first safemode + */ @Override public boolean isPopulatingReplQueues() { if (!shouldPopulateReplQueues()) { @@ -4657,7 +4662,7 @@ public class FSNamesystem implements Nam getEditLog().logSyncAll(); } if (!isInSafeMode()) { - safeMode = new SafeModeInfo(resourcesLow); + safeMode = new SafeModeInfo(resourcesLow, isPopulatingReplQueues()); return; } if (resourcesLow) { Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java?rev=1490803&r1=1490802&r2=1490803&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java Fri Jun 7 20:01:55 2013 @@ -34,9 +34,12 @@ import org.apache.hadoop.hdfs.DFSTestUti import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; +import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; +import org.apache.hadoop.hdfs.server.namenode.ha.HAState; import org.junit.After; import org.junit.Test; import org.mockito.Mockito; +import org.mockito.internal.util.reflection.Whitebox; public class TestFSNamesystem { @@ -104,4 +107,39 @@ public class TestFSNamesystem { assertTrue("After entering safemode due to low resources FSNamesystem." + "isInSafeMode still returned false", fsn.isInSafeMode()); } + + @Test + public void testReplQueuesActiveAfterStartupSafemode() throws IOException, InterruptedException{ + Configuration conf = new Configuration(); + + FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); + FSImage fsImage = Mockito.mock(FSImage.class); + Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); + + FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage); + FSNamesystem fsn = Mockito.spy(fsNamesystem); + + //Make shouldPopulaeReplQueues return true + HAContext haContext = Mockito.mock(HAContext.class); + HAState haState = Mockito.mock(HAState.class); + Mockito.when(haContext.getState()).thenReturn(haState); + Mockito.when(haState.shouldPopulateReplQueues()).thenReturn(true); + Whitebox.setInternalState(fsn, "haContext", haContext); + + //Make NameNode.getNameNodeMetrics() not return null + NameNode.initMetrics(conf, NamenodeRole.NAMENODE); + + fsn.enterSafeMode(false); + assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode()); + assertTrue("Replication queues were being populated during very first " + + "safemode", !fsn.isPopulatingReplQueues()); + fsn.leaveSafeMode(); + assertTrue("FSNamesystem didn't leave safemode", !fsn.isInSafeMode()); + assertTrue("Replication queues weren't being populated even after leaving " + + "safemode", fsn.isPopulatingReplQueues()); + fsn.enterSafeMode(false); + assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode()); + assertTrue("Replication queues weren't being populated after entering " + + "safemode 2nd time", fsn.isPopulatingReplQueues()); + } }