Return-Path: X-Original-To: apmail-hadoop-yarn-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-yarn-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4709210A0D for ; Mon, 24 Feb 2014 22:41:48 +0000 (UTC) Received: (qmail 81786 invoked by uid 500); 24 Feb 2014 22:41:47 -0000 Delivered-To: apmail-hadoop-yarn-commits-archive@hadoop.apache.org Received: (qmail 81722 invoked by uid 500); 24 Feb 2014 22:41:47 -0000 Mailing-List: contact yarn-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: yarn-commits@hadoop.apache.org Delivered-To: mailing list yarn-commits@hadoop.apache.org Received: (qmail 81714 invoked by uid 99); 24 Feb 2014 22:41:47 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Feb 2014 22:41:46 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Feb 2014 22:41:45 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 5E242238897A; Mon, 24 Feb 2014 22:41:25 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1571474 - in /hadoop/common/trunk/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/... Date: Mon, 24 Feb 2014 22:41:25 -0000 To: yarn-commits@hadoop.apache.org From: vinodkv@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140224224125.5E242238897A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: vinodkv Date: Mon Feb 24 22:41:24 2014 New Revision: 1571474 URL: http://svn.apache.org/r1571474 Log: YARN-1686. Fixed NodeManager to properly handle any errors during re-registration after a RESYNC and thus avoid hanging. Contributed by Rohith Sharma. Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1571474&r1=1571473&r2=1571474&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Mon Feb 24 22:41:24 2014 @@ -344,6 +344,10 @@ Release 2.4.0 - UNRELEASED YARN-1742. Fixed javadoc of configuration parameter DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION. (Akira Ajisaka via vinodkv) + YARN-1686. Fixed NodeManager to properly handle any errors during + re-registration after a RESYNC and thus avoid hanging. (Rohith Sharma via + vinodkv) + Release 2.3.1 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1571474&r1=1571473&r2=1571474&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Mon Feb 24 22:41:24 2014 @@ -224,11 +224,16 @@ public class NodeManager extends Composi new Thread() { @Override public void run() { - LOG.info("Notifying ContainerManager to block new container-requests"); - containerManager.setBlockNewContainerRequests(true); - LOG.info("Cleaning up running containers on resync"); - containerManager.cleanupContainersOnNMResync(); - ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater(); + try { + LOG.info("Notifying ContainerManager to block new container-requests"); + containerManager.setBlockNewContainerRequests(true); + LOG.info("Cleaning up running containers on resync"); + containerManager.cleanupContainersOnNMResync(); + ((NodeStatusUpdaterImpl) nodeStatusUpdater).rebootNodeStatusUpdater(); + } catch (YarnRuntimeException e) { + LOG.fatal("Error while rebooting NodeStatusUpdater.", e); + shutDown(); + } } }.start(); } Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java?rev=1571474&r1=1571473&r2=1571474&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java Mon Feb 24 22:41:24 2014 @@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.conf.YarnC import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; @@ -65,6 +66,7 @@ public class TestNodeManagerResync { private FileContext localFS; private CyclicBarrier syncBarrier; private AtomicBoolean assertionFailedInThread = new AtomicBoolean(false); + private AtomicBoolean isNMShutdownCalled = new AtomicBoolean(false); @Before public void setup() throws UnsupportedFileSystemException { @@ -137,6 +139,30 @@ public class TestNodeManagerResync { Assert.assertFalse(assertionFailedInThread.get()); nm.stop(); } + + @SuppressWarnings("unchecked") + @Test(timeout=10000) + public void testNMshutdownWhenResyncThrowException() throws IOException, + InterruptedException, YarnException { + NodeManager nm = new TestNodeManager3(); + YarnConfiguration conf = createNMConfig(); + nm.init(conf); + nm.start(); + Assert.assertEquals(1, ((TestNodeManager3) nm).getNMRegistrationCount()); + nm.getNMDispatcher().getEventHandler() + .handle(new NodeManagerEvent(NodeManagerEventType.RESYNC)); + + synchronized (isNMShutdownCalled) { + while (isNMShutdownCalled.get() == false) { + try { + isNMShutdownCalled.wait(); + } catch (InterruptedException e) { + } + } + } + + Assert.assertTrue("NM shutdown not called.",isNMShutdownCalled.get()); + } private YarnConfiguration createNMConfig() { YarnConfiguration conf = new YarnConfiguration(); @@ -322,4 +348,44 @@ public class TestNodeManagerResync { } } } + + class TestNodeManager3 extends NodeManager { + + private int registrationCount = 0; + + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + return new TestNodeStatusUpdaterImpl3(context, dispatcher, healthChecker, + metrics); + } + + public int getNMRegistrationCount() { + return registrationCount; + } + + @Override + protected void shutDown() { + synchronized (isNMShutdownCalled) { + isNMShutdownCalled.set(true); + isNMShutdownCalled.notify(); + } + } + + class TestNodeStatusUpdaterImpl3 extends MockNodeStatusUpdater { + + public TestNodeStatusUpdaterImpl3(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + super(context, dispatcher, healthChecker, metrics); + } + + @Override + protected void registerWithRM() throws YarnException, IOException { + super.registerWithRM(); + registrationCount++; + if (registrationCount > 1) { + throw new YarnRuntimeException("Registration with RM failed."); + } + } + }} }