Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DFFE11838B for ; Mon, 4 May 2015 20:04:38 +0000 (UTC) Received: (qmail 15725 invoked by uid 500); 4 May 2015 20:04:22 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 15364 invoked by uid 500); 4 May 2015 20:04:21 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 13480 invoked by uid 99); 4 May 2015 20:04:21 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 04 May 2015 20:04:21 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id EE30CE0984; Mon, 4 May 2015 20:04:20 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zjshen@apache.org To: common-commits@hadoop.apache.org Date: Mon, 04 May 2015 20:04:49 -0000 Message-Id: <1d5372d5a3f54f15b7f96e095bf29632@git.apache.org> In-Reply-To: <6c861728d79543b6973a8facd17148c6@git.apache.org> References: <6c861728d79543b6973a8facd17148c6@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [30/50] [abbrv] hadoop git commit: YARN-2893. AMLaucher: sporadic job failures due to EOFException in readTokenStorageStream. (Zhihai Xu via gera) YARN-2893. AMLaucher: sporadic job failures due to EOFException in readTokenStorageStream. (Zhihai Xu via gera) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9b14ff8d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9b14ff8d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9b14ff8d Branch: refs/heads/YARN-2928 Commit: 9b14ff8da473d9ff3bd1750bcf348a4af2d719d4 Parents: 41ef214 Author: Gera Shegalov Authored: Fri May 1 14:49:09 2015 -0700 Committer: Zhijie Shen Committed: Mon May 4 12:58:58 2015 -0700 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 + .../server/resourcemanager/RMAppManager.java | 36 +++++------ .../resourcemanager/amlauncher/AMLauncher.java | 11 +++- .../server/resourcemanager/TestAppManager.java | 60 ++++++++++++++++++ .../TestApplicationMasterLauncher.java | 64 ++++++++++++++++++++ 5 files changed, 153 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/9b14ff8d/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6c1a2e1..55bf9e4 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -351,6 +351,9 @@ Release 2.8.0 - UNRELEASED YARN-3564. Fix TestContainerAllocation.testAMContainerAllocationWhenDNSUnavailable fails randomly. (Jian He via wangda) + YARN-2893. AMLaucher: sporadic job failures due to EOFException in + readTokenStorageStream. (Zhihai Xu via gera) + Release 2.7.1 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/9b14ff8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index dcc2a64..d946a2f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -281,29 +281,29 @@ public class RMAppManager implements EventHandler, RMAppImpl application = createAndPopulateNewRMApp(submissionContext, submitTime, user, false); ApplicationId appId = submissionContext.getApplicationId(); - - if (UserGroupInformation.isSecurityEnabled()) { - try { + Credentials credentials = null; + try { + credentials = parseCredentials(submissionContext); + if (UserGroupInformation.isSecurityEnabled()) { this.rmContext.getDelegationTokenRenewer().addApplicationAsync(appId, - parseCredentials(submissionContext), - submissionContext.getCancelTokensWhenComplete(), + credentials, submissionContext.getCancelTokensWhenComplete(), application.getUser()); - } catch (Exception e) { - LOG.warn("Unable to parse credentials.", e); - // Sending APP_REJECTED is fine, since we assume that the - // RMApp is in NEW state and thus we haven't yet informed the - // scheduler about the existence of the application - assert application.getState() == RMAppState.NEW; + } else { + // Dispatcher is not yet started at this time, so these START events + // enqueued should be guaranteed to be first processed when dispatcher + // gets started. this.rmContext.getDispatcher().getEventHandler() - .handle(new RMAppRejectedEvent(applicationId, e.getMessage())); - throw RPCUtil.getRemoteException(e); + .handle(new RMAppEvent(applicationId, RMAppEventType.START)); } - } else { - // Dispatcher is not yet started at this time, so these START events - // enqueued should be guaranteed to be first processed when dispatcher - // gets started. + } catch (Exception e) { + LOG.warn("Unable to parse credentials.", e); + // Sending APP_REJECTED is fine, since we assume that the + // RMApp is in NEW state and thus we haven't yet informed the + // scheduler about the existence of the application + assert application.getState() == RMAppState.NEW; this.rmContext.getDispatcher().getEventHandler() - .handle(new RMAppEvent(applicationId, RMAppEventType.START)); + .handle(new RMAppRejectedEvent(applicationId, e.getMessage())); + throw RPCUtil.getRemoteException(e); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/9b14ff8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java index b5021ca..f62ee50 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java @@ -28,6 +28,7 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataInputByteBuffer; import org.apache.hadoop.io.DataOutputBuffer; @@ -201,7 +202,9 @@ public class AMLauncher implements Runnable { return container; } - private void setupTokens( + @Private + @VisibleForTesting + protected void setupTokens( ContainerLaunchContext container, ContainerId containerID) throws IOException { Map environment = container.getEnvironment(); @@ -227,10 +230,12 @@ public class AMLauncher implements Runnable { } Credentials credentials = new Credentials(); DataInputByteBuffer dibb = new DataInputByteBuffer(); - if (container.getTokens() != null) { + ByteBuffer tokens = container.getTokens(); + if (tokens != null) { // TODO: Don't do this kind of checks everywhere. - dibb.reset(container.getTokens()); + dibb.reset(tokens); credentials.readTokenStorageStream(dibb); + tokens.rewind(); } // Add AMRMToken http://git-wip-us.apache.org/repos/asf/hadoop/blob/9b14ff8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java index 5ebc68c..3db8b7c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java @@ -21,6 +21,8 @@ package org.apache.hadoop.yarn.server.resourcemanager; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; @@ -33,6 +35,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.nio.ByteBuffer; import java.util.HashMap; import java.util.List; import java.util.concurrent.ConcurrentMap; @@ -479,6 +482,63 @@ public class TestAppManager{ getAppEventType()); } + @Test + public void testRMAppSubmitWithInvalidTokens() throws Exception { + // Setup invalid security tokens + DataOutputBuffer dob = new DataOutputBuffer(); + ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, + dob.getLength()); + asContext.getAMContainerSpec().setTokens(securityTokens); + try { + appMonitor.submitApplication(asContext, "test"); + Assert.fail("Application submission should fail because" + + " Tokens are invalid."); + } catch (YarnException e) { + // Exception is expected + Assert.assertTrue("The thrown exception is not" + + " java.io.EOFException", + e.getMessage().contains("java.io.EOFException")); + } + int timeoutSecs = 0; + while ((getAppEventType() == RMAppEventType.KILL) && + timeoutSecs++ < 20) { + Thread.sleep(1000); + } + Assert.assertEquals("app event type sent is wrong", + RMAppEventType.APP_REJECTED, getAppEventType()); + asContext.getAMContainerSpec().setTokens(null); + } + + @Test + public void testRMAppSubmitWithValidTokens() throws Exception { + // Setup valid security tokens + DataOutputBuffer dob = new DataOutputBuffer(); + Credentials credentials = new Credentials(); + credentials.writeTokenStorageToStream(dob); + ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, + dob.getLength()); + asContext.getAMContainerSpec().setTokens(securityTokens); + appMonitor.submitApplication(asContext, "test"); + RMApp app = rmContext.getRMApps().get(appId); + Assert.assertNotNull("app is null", app); + Assert.assertEquals("app id doesn't match", appId, + app.getApplicationId()); + Assert.assertEquals("app state doesn't match", RMAppState.NEW, + app.getState()); + verify(metricsPublisher).appACLsUpdated( + any(RMApp.class), any(String.class), anyLong()); + + // wait for event to be processed + int timeoutSecs = 0; + while ((getAppEventType() == RMAppEventType.KILL) && + timeoutSecs++ < 20) { + Thread.sleep(1000); + } + Assert.assertEquals("app event type sent is wrong", RMAppEventType.START, + getAppEventType()); + asContext.getAMContainerSpec().setTokens(null); + } + @Test (timeout = 30000) public void testRMAppSubmitMaxAppAttempts() throws Exception { int[] globalMaxAppAttempts = new int[] { 10, 1 }; http://git-wip-us.apache.org/repos/asf/hadoop/blob/9b14ff8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java index 11cd1fd..9a4395e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java @@ -26,6 +26,9 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; @@ -38,6 +41,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.SerializedException; @@ -47,7 +51,10 @@ import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException; import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.RPCUtil; +import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncher; +import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -238,4 +245,61 @@ public class TestApplicationMasterLauncher { } catch (ApplicationAttemptNotFoundException e) { } } + + @Test + public void testSetupTokens() throws Exception { + MockRM rm = new MockRM(); + rm.start(); + MockNM nm1 = rm.registerNode("h1:1234", 5000); + RMApp app = rm.submitApp(2000); + /// kick the scheduling + nm1.nodeHeartbeat(true); + RMAppAttempt attempt = app.getCurrentAppAttempt(); + MyAMLauncher launcher = new MyAMLauncher(rm.getRMContext(), + attempt, AMLauncherEventType.LAUNCH, rm.getConfig()); + DataOutputBuffer dob = new DataOutputBuffer(); + Credentials ts = new Credentials(); + ts.writeTokenStorageToStream(dob); + ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), + 0, dob.getLength()); + ContainerLaunchContext amContainer = + ContainerLaunchContext.newInstance(null, null, + null, null, securityTokens, null); + ContainerId containerId = ContainerId.newContainerId( + attempt.getAppAttemptId(), 0L); + + try { + launcher.setupTokens(amContainer, containerId); + } catch (Exception e) { + // ignore the first fake exception + } + try { + launcher.setupTokens(amContainer, containerId); + } catch (java.io.EOFException e) { + Assert.fail("EOFException should not happen."); + } + } + + static class MyAMLauncher extends AMLauncher { + int count; + public MyAMLauncher(RMContext rmContext, RMAppAttempt application, + AMLauncherEventType eventType, Configuration conf) { + super(rmContext, application, eventType, conf); + count = 0; + } + + protected org.apache.hadoop.security.token.Token + createAndSetAMRMToken() { + count++; + if (count == 1) { + throw new RuntimeException("createAndSetAMRMToken failure"); + } + return null; + } + + protected void setupTokens(ContainerLaunchContext container, + ContainerId containerID) throws IOException { + super.setupTokens(container, containerID); + } + } }