Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id D1EE7200C29 for ; Tue, 28 Feb 2017 22:26:37 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id CEE41160B7C; Tue, 28 Feb 2017 21:26:37 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 80FC4160B59 for ; Tue, 28 Feb 2017 22:26:36 +0100 (CET) Received: (qmail 26728 invoked by uid 500); 28 Feb 2017 21:26:35 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 26719 invoked by uid 99); 28 Feb 2017 21:26:35 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 28 Feb 2017 21:26:35 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 75CD0DFE1E; Tue, 28 Feb 2017 21:26:35 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jianhe@apache.org To: common-commits@hadoop.apache.org Message-Id: <43c883fa4f724227920338dd66ce4120@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: YARN-6153. KeepContainer does not work when AM retry window is set. Contributed by kyungwan nam Date: Tue, 28 Feb 2017 21:26:35 +0000 (UTC) archived-at: Tue, 28 Feb 2017 21:26:38 -0000 Repository: hadoop Updated Branches: refs/heads/trunk e0bb867c3 -> 235203dff YARN-6153. KeepContainer does not work when AM retry window is set. Contributed by kyungwan nam Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/235203df Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/235203df Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/235203df Branch: refs/heads/trunk Commit: 235203dffda1482fb38762fde544c4dd9c3e1fa8 Parents: e0bb867 Author: Jian He Authored: Tue Feb 28 13:23:36 2017 -0800 Committer: Jian He Committed: Tue Feb 28 13:23:36 2017 -0800 ---------------------------------------------------------------------- .../server/resourcemanager/rmapp/RMAppImpl.java | 17 +--- .../rmapp/attempt/RMAppAttemptImpl.java | 46 +++++------ .../yarn/server/resourcemanager/MockRM.java | 6 +- .../resourcemanager/TestClientRMService.java | 4 +- .../applicationsmanager/TestAMRestart.java | 84 +++++++++++++++++--- .../TestRMAppAttemptImplDiagnostics.java | 2 +- .../attempt/TestRMAppAttemptTransitions.java | 19 +++-- 7 files changed, 112 insertions(+), 66 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/235203df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 516109b..9f00b2e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -994,13 +994,7 @@ public class RMAppImpl implements RMApp, Recoverable { } RMAppAttempt attempt = new RMAppAttemptImpl(appAttemptId, rmContext, scheduler, masterService, - submissionContext, conf, - // The newly created attempt maybe last attempt if (number of - // previously failed attempts(which should not include Preempted, - // hardware error and NM resync) + 1) equal to the max-attempt - // limit. - maxAppAttempts == (getNumFailedAppAttempts() + 1), amReq, - currentAMBlacklistManager); + submissionContext, conf, amReq, this, currentAMBlacklistManager); attempts.put(appAttemptId, attempt); currentAttempt = attempt; } @@ -1498,18 +1492,13 @@ public class RMAppImpl implements RMApp, Recoverable { }; } - private int getNumFailedAppAttempts() { + public int getNumFailedAppAttempts() { int completedAttempts = 0; - long endTime = this.systemClock.getTime(); // Do not count AM preemption, hardware failures or NM resync // as attempt failure. for (RMAppAttempt attempt : attempts.values()) { if (attempt.shouldCountTowardsMaxAttemptRetry()) { - if (this.attemptFailuresValidityInterval <= 0 - || (attempt.getFinishTime() > endTime - - this.attemptFailuresValidityInterval)) { - completedAttempts++; - } + completedAttempts++; } } return completedAttempts; http://git-wip-us.apache.org/repos/asf/hadoop/blob/235203df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 25138c5..5c0f48e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -143,6 +143,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private final EventHandler eventHandler; private final YarnScheduler scheduler; private final ApplicationMasterService masterService; + private final RMApp rmApp; private final ReadLock readLock; private final WriteLock writeLock; @@ -179,12 +180,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private int amContainerExitStatus = ContainerExitStatus.INVALID; private Configuration conf; - // Since AM preemption, hardware error and NM resync are not counted towards - // AM failure count, even if this flag is true, a new attempt can still be - // re-created if this attempt is eventually failed because of preemption, - // hardware error or NM resync. So this flag indicates that this may be - // last attempt. - private final boolean maybeLastAttempt; private static final ExpiredTransition EXPIRED_TRANSITION = new ExpiredTransition(); private static final AttemptFailedTransition FAILED_TRANSITION = @@ -490,16 +485,16 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMContext rmContext, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationSubmissionContext submissionContext, - Configuration conf, boolean maybeLastAttempt, ResourceRequest amReq) { + Configuration conf, ResourceRequest amReq, RMApp rmApp) { this(appAttemptId, rmContext, scheduler, masterService, submissionContext, - conf, maybeLastAttempt, amReq, new DisabledBlacklistManager()); + conf, amReq, rmApp, new DisabledBlacklistManager()); } public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, RMContext rmContext, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationSubmissionContext submissionContext, - Configuration conf, boolean maybeLastAttempt, ResourceRequest amReq, + Configuration conf, ResourceRequest amReq, RMApp rmApp, BlacklistManager amBlacklistManager) { this.conf = conf; this.applicationAttemptId = appAttemptId; @@ -514,7 +509,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { this.writeLock = lock.writeLock(); this.proxiedTrackingUrl = generateProxyUriWithScheme(); - this.maybeLastAttempt = maybeLastAttempt; this.stateMachine = stateMachineFactory.make(this); this.attemptMetrics = @@ -531,6 +525,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { } this.diagnostics = new BoundedAppender(diagnosticsLimitKC * 1024); + this.rmApp = rmApp; } private int getDiagnosticsLimitKCOrThrow(final Configuration configuration) { @@ -1215,8 +1210,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { @Override public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { - RMApp rmApp = appAttempt.rmContext.getRMApps().get( - appAttempt.getAppAttemptId().getApplicationId()); + RMApp rmApp = appAttempt.rmApp; /* * If last attempt recovered final state is null .. it means attempt was @@ -1462,14 +1456,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { if (appAttempt.submissionContext .getKeepContainersAcrossApplicationAttempts() && !appAttempt.submissionContext.getUnmanagedAM()) { - // See if we should retain containers for non-unmanaged applications - if (!appAttempt.shouldCountTowardsMaxAttemptRetry()) { - // Premption, hardware failures, NM resync doesn't count towards - // app-failures and so we should retain containers. - keepContainersAcrossAppAttempts = true; - } else if (!appAttempt.maybeLastAttempt) { - // Not preemption, hardware failures or NM resync. - // Not last-attempt too - keep containers. + int numberOfFailure = ((RMAppImpl)appAttempt.rmApp) + .getNumFailedAppAttempts(); + if (numberOfFailure < appAttempt.rmApp.getMaxAppAttempts()) { keepContainersAcrossAppAttempts = true; } } @@ -1496,9 +1485,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { .applicationAttemptFinished(appAttempt, finalAttemptState); appAttempt.rmContext.getSystemMetricsPublisher() .appAttemptFinished(appAttempt, finalAttemptState, - appAttempt.rmContext.getRMApps().get( - appAttempt.applicationAttemptId.getApplicationId()), - System.currentTimeMillis()); + appAttempt.rmApp, System.currentTimeMillis()); } } @@ -1545,6 +1532,14 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { @Override public boolean shouldCountTowardsMaxAttemptRetry() { + long attemptFailuresValidityInterval = this.submissionContext + .getAttemptFailuresValidityInterval(); + long end = System.currentTimeMillis(); + if (attemptFailuresValidityInterval > 0 + && this.getFinishTime() > 0 + && this.getFinishTime() < (end - attemptFailuresValidityInterval)) { + return false; + } try { this.readLock.lock(); int exitStatus = getAMContainerExitStatus(); @@ -2222,11 +2217,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { return attemptReport; } - // for testing - public boolean mayBeLastAttempt() { - return maybeLastAttempt; - } - @Override public RMAppAttemptMetrics getRMAppAttemptMetrics() { // didn't use read/write lock here because RMAppAttemptMetrics has its own http://git-wip-us.apache.org/repos/asf/hadoop/blob/235203df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 7d19dab..f9f42ad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -618,15 +618,15 @@ public class MockRM extends ResourceManager { false, null, 0, null, true, Priority.newInstance(0)); } - public RMApp submitApp(int masterMemory, long attemptFailuresValidityInterval) - throws Exception { + public RMApp submitApp(int masterMemory, long attemptFailuresValidityInterval, + boolean keepContainers) throws Exception { Resource resource = Records.newRecord(Resource.class); resource.setMemorySize(masterMemory); Priority priority = Priority.newInstance(0); return submitApp(resource, "", UserGroupInformation.getCurrentUser() .getShortUserName(), null, false, null, super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, - YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null, null, true, false, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null, null, true, keepContainers, false, null, attemptFailuresValidityInterval, null, true, priority); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/235203df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 9076ac3..7a67aa8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -396,7 +396,7 @@ public class TestClientRMService { mock(ApplicationSubmissionContext.class); YarnConfiguration config = new YarnConfiguration(); RMAppAttemptImpl rmAppAttemptImpl = new RMAppAttemptImpl(attemptId, - rmContext, yarnScheduler, null, asContext, config, false, null); + rmContext, yarnScheduler, null, asContext, config, null, null); ApplicationResourceUsageReport report = rmAppAttemptImpl .getApplicationResourceUsageReport(); assertEquals(report, RMServerUtils.DUMMY_APPLICATION_RESOURCE_USAGE_REPORT); @@ -1327,7 +1327,7 @@ public class TestClientRMService { ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance( ApplicationId.newInstance(123456, 1), 1); RMAppAttemptImpl rmAppAttemptImpl = spy(new RMAppAttemptImpl(attemptId, - rmContext, yarnScheduler, null, asContext, config, false, null)); + rmContext, yarnScheduler, null, asContext, config, null, app)); Container container = Container.newInstance( ContainerId.newContainerId(attemptId, 1), null, "", null, null, null); RMContainerImpl containerimpl = spy(new RMContainerImpl(container, http://git-wip-us.apache.org/repos/asf/hadoop/blob/235203df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java index 03a9645..4fa8287 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java @@ -411,7 +411,6 @@ public class TestAMRestart { MockAM am2 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1); RMAppAttempt attempt2 = app1.getCurrentAppAttempt(); - Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt()); // Preempt the second attempt. ContainerId amContainer2 = @@ -427,7 +426,6 @@ public class TestAMRestart { MockAM am3 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 3, nm1); RMAppAttempt attempt3 = app1.getCurrentAppAttempt(); - Assert.assertTrue(((RMAppAttemptImpl) attempt3).mayBeLastAttempt()); // mimic NM disk_failure ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class); @@ -454,7 +452,6 @@ public class TestAMRestart { MockAM am4 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1); RMAppAttempt attempt4 = app1.getCurrentAppAttempt(); - Assert.assertTrue(((RMAppAttemptImpl) attempt4).mayBeLastAttempt()); // create second NM, and register to rm1 MockNM nm2 = @@ -475,7 +472,6 @@ public class TestAMRestart { MockAM am5 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 5, nm2); RMAppAttempt attempt5 = app1.getCurrentAppAttempt(); - Assert.assertTrue(((RMAppAttemptImpl) attempt5).mayBeLastAttempt()); // fail the AM normally nm2 .nodeHeartbeat(am5.getApplicationAttemptId(), 1, ContainerState.COMPLETE); @@ -584,7 +580,6 @@ public class TestAMRestart { // AM should be restarted even though max-am-attempt is 1. MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); - Assert.assertTrue(((RMAppAttemptImpl) attempt1).mayBeLastAttempt()); // Restart rm. MockRM rm2 = new MockRM(conf, memStore); @@ -645,7 +640,7 @@ public class TestAMRestart { // set window size to a larger number : 60s // we will verify the app should be failed if // two continuous attempts failed in 60s. - RMApp app = rm1.submitApp(200, 60000); + RMApp app = rm1.submitApp(200, 60000, false); MockAM am = MockRM.launchAM(app, rm1, nm1); // Fail current attempt normally @@ -655,8 +650,7 @@ public class TestAMRestart { // launch the second attempt rm1.waitForState(app.getApplicationId(), RMAppState.ACCEPTED); Assert.assertEquals(2, app.getAppAttempts().size()); - Assert.assertTrue(((RMAppAttemptImpl) app.getCurrentAppAttempt()) - .mayBeLastAttempt()); + MockAM am_2 = MockRM.launchAndRegisterAM(app, rm1, nm1); rm1.waitForState(am_2.getApplicationAttemptId(), RMAppAttemptState.RUNNING); nm1.nodeHeartbeat(am_2.getApplicationAttemptId(), @@ -667,7 +661,7 @@ public class TestAMRestart { ControlledClock clock = new ControlledClock(); // set window size to 10s - RMAppImpl app1 = (RMAppImpl)rm1.submitApp(200, 10000); + RMAppImpl app1 = (RMAppImpl)rm1.submitApp(200, 10000, false); app1.setSystemClock(clock); MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); @@ -684,7 +678,6 @@ public class TestAMRestart { Assert.assertEquals(2, app1.getAppAttempts().size()); RMAppAttempt attempt2 = app1.getCurrentAppAttempt(); - Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt()); MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1); rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.RUNNING); @@ -863,4 +856,75 @@ public class TestAMRestart { rm1.stop(); } + + // Test restarting AM launched with the KeepContainers and AM reset window. + // after AM reset window, even if AM who was the last is failed, + // all containers are launched by previous AM should be kept. + @Test (timeout = 20000) + public void testAMRestartNotLostContainerAfterAttemptFailuresValidityInterval() + throws Exception { + YarnConfiguration conf = new YarnConfiguration(); + // explicitly set max-am-retry count as 2. + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2); + + MockRM rm1 = new MockRM(conf); + rm1.start(); + MockNM nm1 = + new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService()); + nm1.registerNode(); + + // set window size to 10s and enable keepContainers + RMAppImpl app1 = (RMAppImpl)rm1.submitApp(200, 10000, true); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + int NUM_CONTAINERS = 2; + allocateContainers(nm1, am1, NUM_CONTAINERS); + + // launch the 2nd container, for testing running container transferred. + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING); + + // Fail attempt1 normally + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), + 1, ContainerState.COMPLETE); + rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED); + + // launch the second attempt + rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED); + Assert.assertEquals(2, app1.getAppAttempts().size()); + + // It will be the last attempt. + RMAppAttempt attempt2 = app1.getCurrentAppAttempt(); + MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.RUNNING); + + // wait for 10 seconds to reset AM failure count + Thread.sleep(10 * 1000); + + // Fail attempt2 normally + nm1.nodeHeartbeat(am2.getApplicationAttemptId(), + 1, ContainerState.COMPLETE); + rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED); + + // can launch the third attempt successfully + rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED); + Assert.assertEquals(3, app1.getAppAttempts().size()); + MockAM am3 = rm1.launchAM(app1, rm1, nm1); + RegisterApplicationMasterResponse registerResponse = + am3.registerAppAttempt(); + + // keepContainers is applied, even if attempt2 was the last attempt. + Assert.assertEquals(1, registerResponse.getContainersFromPreviousAttempts() + .size()); + boolean containerId2Exists = false; + Container container = registerResponse.getContainersFromPreviousAttempts().get(0); + if (container.getId().equals(containerId2)) { + containerId2Exists = true; + } + Assert.assertTrue(containerId2Exists); + + rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING); + rm1.stop(); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/235203df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptImplDiagnostics.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptImplDiagnostics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptImplDiagnostics.java index a160eb8..19b5dd9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptImplDiagnostics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptImplDiagnostics.java @@ -106,6 +106,6 @@ public class TestRMAppAttemptImplDiagnostics { when(mockRMContext.getDispatcher()).thenReturn(mockDispatcher); return new RMAppAttemptImpl(mockApplicationAttemptId, mockRMContext, null, - null, null, configuration, false, null); + null, null, configuration, null, null); } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/235203df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 786cc50..ced5bd9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -327,10 +327,10 @@ public class TestRMAppAttemptTransitions { application = mock(RMAppImpl.class); applicationAttempt = new RMAppAttemptImpl(applicationAttemptId, spyRMContext, scheduler, - masterService, submissionContext, new Configuration(), false, + masterService, submissionContext, new Configuration(), BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - submissionContext.getResource(), 1)); + submissionContext.getResource(), 1), application); when(application.getCurrentAppAttempt()).thenReturn(applicationAttempt); when(application.getApplicationId()).thenReturn(applicationId); @@ -1107,10 +1107,10 @@ public class TestRMAppAttemptTransitions { RMAppAttempt myApplicationAttempt = new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler,masterService, - submissionContext, myConf, false, + submissionContext, myConf, BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - submissionContext.getResource(), 1)); + submissionContext.getResource(), 1), application); //submit, schedule and allocate app attempt myApplicationAttempt.handle( @@ -1536,6 +1536,9 @@ public class TestRMAppAttemptTransitions { // create a failed attempt. when(submissionContext.getKeepContainersAcrossApplicationAttempts()) .thenReturn(true); + when(application.getMaxAppAttempts()).thenReturn(2); + when(application.getNumFailedAppAttempts()).thenReturn(1); + Container amContainer = allocateApplicationAttempt(); launchApplicationAttempt(amContainer); runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false); @@ -1581,9 +1584,9 @@ public class TestRMAppAttemptTransitions { applicationAttempt = new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler, masterService, submissionContext, new Configuration(), - true, BuilderUtils.newResourceRequest( + BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - submissionContext.getResource(), 1)); + submissionContext.getResource(), 1), application); when(submissionContext.getKeepContainersAcrossApplicationAttempts()) .thenReturn(true); when(submissionContext.getMaxAppAttempts()).thenReturn(1); @@ -1642,9 +1645,9 @@ public class TestRMAppAttemptTransitions { applicationAttempt = new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler, masterService, submissionContext, - new Configuration(), true, ResourceRequest.newInstance( + new Configuration(), ResourceRequest.newInstance( Priority.UNDEFINED, "host1", Resource.newInstance(3333, 1), 3, - false, "label-expression")); + false, "label-expression"), application); new RMAppAttemptImpl.ScheduleTransition().transition( (RMAppAttemptImpl) applicationAttempt, null); } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org