Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 11BBE200CFF for ; Tue, 22 Aug 2017 20:03:06 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 102A9163A14; Tue, 22 Aug 2017 18:03:06 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 05DC91639FF for ; Tue, 22 Aug 2017 20:03:04 +0200 (CEST) Received: (qmail 32981 invoked by uid 500); 22 Aug 2017 18:03:03 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 32970 invoked by uid 99); 22 Aug 2017 18:03:03 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 22 Aug 2017 18:03:03 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id E0181F3261; Tue, 22 Aug 2017 18:03:02 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jlowe@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: YARN-2416. InvalidStateTransitonException in ResourceManager if AMLauncher does not receive response for startContainers() call in time. Contributed by Jonathan Eagles Date: Tue, 22 Aug 2017 18:03:02 +0000 (UTC) archived-at: Tue, 22 Aug 2017 18:03:06 -0000 Repository: hadoop Updated Branches: refs/heads/branch-2.8 3f735ad64 -> 0b6477367 YARN-2416. InvalidStateTransitonException in ResourceManager if AMLauncher does not receive response for startContainers() call in time. Contributed by Jonathan Eagles (cherry picked from commit 3efcd51c3b3eb667d83e08b500bb7a7ea559fabe) Conflicts: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0b647736 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0b647736 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0b647736 Branch: refs/heads/branch-2.8 Commit: 0b64773678485dea7d961c07f2f014742dc0034b Parents: 3f735ad Author: Jason Lowe Authored: Tue Aug 22 12:56:09 2017 -0500 Committer: Jason Lowe Committed: Tue Aug 22 13:02:52 2017 -0500 ---------------------------------------------------------------------- .../rmapp/attempt/RMAppAttemptImpl.java | 25 ++++++++++++--- .../attempt/TestRMAppAttemptTransitions.java | 32 +++++++++++++------- 2 files changed, 41 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b647736/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 74de037..bfb5f28 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -180,7 +180,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { new ExpiredTransition(); private static final AttemptFailedTransition FAILED_TRANSITION = new AttemptFailedTransition(); - + private static final AMRegisteredTransition REGISTERED_TRANSITION = + new AMRegisteredTransition(); + private static final AMLaunchedTransition LAUNCHED_TRANSITION = + new AMLaunchedTransition(); private RMAppAttemptEvent eventCausingFinalSaving; private RMAppAttemptState targetedFinalState; private RMAppAttemptState recoveredFinalState; @@ -310,7 +313,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { // Transitions from ALLOCATED State .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED, - RMAppAttemptEventType.LAUNCHED, new AMLaunchedTransition()) + RMAppAttemptEventType.LAUNCHED, LAUNCHED_TRANSITION) .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.LAUNCH_FAILED, new FinalSavingTransition(new LaunchFailedTransition(), @@ -324,6 +327,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.FAIL, new FinalSavingTransition(FAILED_TRANSITION, RMAppAttemptState.FAILED)) + .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.RUNNING, + RMAppAttemptEventType.REGISTERED, REGISTERED_TRANSITION) .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.CONTAINER_FINISHED, new FinalSavingTransition( @@ -331,7 +336,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { // Transitions from LAUNCHED State .addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING, - RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition()) + RMAppAttemptEventType.REGISTERED, REGISTERED_TRANSITION) .addTransition(RMAppAttemptState.LAUNCHED, EnumSet.of(RMAppAttemptState.LAUNCHED, RMAppAttemptState.FINAL_SAVING), RMAppAttemptEventType.CONTAINER_FINISHED, @@ -353,6 +358,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptState.FAILED)) // Transitions from RUNNING State + .addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING, + RMAppAttemptEventType.LAUNCHED) .addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.UNREGISTERED, new AMUnregisteredTransition()) .addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING, @@ -417,6 +424,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptState.FAILED, RMAppAttemptState.FAILED, EnumSet.of( + RMAppAttemptEventType.LAUNCHED, RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.KILL, RMAppAttemptEventType.FAIL, @@ -434,6 +442,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { new FinalTransition(RMAppAttemptState.FINISHED)) .addTransition(RMAppAttemptState.FINISHING, RMAppAttemptState.FINISHING, EnumSet.of( + RMAppAttemptEventType.LAUNCHED, RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.STATUS_UPDATE, RMAppAttemptEventType.CONTAINER_ALLOCATED, @@ -447,6 +456,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptState.FINISHED, RMAppAttemptState.FINISHED, EnumSet.of( + RMAppAttemptEventType.LAUNCHED, RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.CONTAINER_ALLOCATED, @@ -1226,7 +1236,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { * 2) OR AMLivelinessMonitor expires this attempt (when am doesn't * heart beat back). */ - (new AMLaunchedTransition()).transition(appAttempt, event); + LAUNCHED_TRANSITION.transition(appAttempt, event); return RMAppAttemptState.LAUNCHED; } } @@ -1459,7 +1469,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { - if (event.getType() == RMAppAttemptEventType.LAUNCHED) { + if (event.getType() == RMAppAttemptEventType.LAUNCHED + || event.getType() == RMAppAttemptEventType.REGISTERED) { appAttempt.launchAMEndTime = System.currentTimeMillis(); long delay = appAttempt.launchAMEndTime - appAttempt.launchAMStartTime; @@ -1586,6 +1597,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { + if (!RMAppAttemptState.LAUNCHED.equals(appAttempt.getState())) { + // registered received before launch + LAUNCHED_TRANSITION.transition(appAttempt, event); + } long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime; ClusterMetrics.getMetrics().addAMRegisterDelay(delay); RMAppAttemptRegistrationEvent registrationEvent http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b647736/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 9dd2b4c..378d1f9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -525,12 +525,9 @@ public class TestRMAppAttemptTransitions { verifyApplicationAttemptFinished(RMAppAttemptState.FAILED); } - /** - * {@link RMAppAttemptState#LAUNCH} - */ - private void testAppAttemptLaunchedState(Container container) { - assertEquals(RMAppAttemptState.LAUNCHED, - applicationAttempt.getAppAttemptState()); + private void testAppAttemptLaunchedState(Container container, + RMAppAttemptState state) { + assertEquals(state, applicationAttempt.getAppAttemptState()); assertEquals(container, applicationAttempt.getMasterContainer()); if (UserGroupInformation.isSecurityEnabled()) { // ClientTokenMasterKey has been registered in SecretManager, it's able to @@ -685,13 +682,18 @@ public class TestRMAppAttemptTransitions { } private void launchApplicationAttempt(Container container) { + launchApplicationAttempt(container, RMAppAttemptState.LAUNCHED); + } + + private void launchApplicationAttempt(Container container, + RMAppAttemptState state) { applicationAttempt.handle( - new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(), + new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.LAUNCHED)); - testAppAttemptLaunchedState(container); + testAppAttemptLaunchedState(container, state); } - + private void runApplicationAttempt(Container container, String host, int rpcPort, @@ -722,7 +724,7 @@ public class TestRMAppAttemptTransitions { when(submissionContext.getUnmanagedAM()).thenReturn(true); // submit AM and check it goes to LAUNCHED state scheduleApplicationAttempt(); - testAppAttemptLaunchedState(null); + testAppAttemptLaunchedState(null, RMAppAttemptState.LAUNCHED); verify(amLivelinessMonitor, times(1)).register( applicationAttempt.getAppAttemptId()); @@ -929,7 +931,15 @@ public class TestRMAppAttemptTransitions { applicationAttempt.createApplicationAttemptState()); testAppAttemptFailedState(amContainer, diagnostics); } - + + @Test(timeout = 10000) + public void testAllocatedToRunning() { + Container amContainer = allocateApplicationAttempt(); + // Register attempt event arrives before launched attempt event + runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false); + launchApplicationAttempt(amContainer, RMAppAttemptState.RUNNING); + } + @Test(timeout = 10000) public void testCreateAppAttemptReport() { RMAppAttemptState[] attemptStates = RMAppAttemptState.values(); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org