Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id CF55518B21 for ; Mon, 12 Oct 2015 18:53:07 +0000 (UTC) Received: (qmail 39283 invoked by uid 500); 12 Oct 2015 18:52:09 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 39225 invoked by uid 500); 12 Oct 2015 18:52:09 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 39215 invoked by uid 99); 12 Oct 2015 18:52:09 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 12 Oct 2015 18:52:09 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 133E2E0218; Mon, 12 Oct 2015 18:52:09 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jianhe@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: YARN-4230. RM crashes with NPE when increasing container resource if there is no headroom left. Contributed by Meng Ding (cherry picked from commit 9849c8b3865c7c9c9be81ae0ef8f29caa1d5f881) Date: Mon, 12 Oct 2015 18:52:09 +0000 (UTC) Repository: hadoop Updated Branches: refs/heads/branch-2 146f297d7 -> ee1ce393f YARN-4230. RM crashes with NPE when increasing container resource if there is no headroom left. Contributed by Meng Ding (cherry picked from commit 9849c8b3865c7c9c9be81ae0ef8f29caa1d5f881) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ee1ce393 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ee1ce393 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ee1ce393 Branch: refs/heads/branch-2 Commit: ee1ce393f5fc8c7cf59cc441f234c41d28929009 Parents: 146f297 Author: Jian He Authored: Mon Oct 12 11:51:33 2015 -0700 Committer: Jian He Committed: Mon Oct 12 11:52:04 2015 -0700 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 + .../allocator/IncreaseContainerAllocator.java | 4 +- .../capacity/TestContainerResizing.java | 72 ++++++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee1ce393/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 83bd63c..d2d00e5 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -874,6 +874,9 @@ Release 2.8.0 - UNRELEASED YARN-4201. AMBlacklist does not work for minicluster. (Jun Gong via zxu) + YARN-4230. RM crashes with NPE when increasing container resource if there is no headroom left. + (Meng Ding via jianhe) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee1ce393/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java index 9350adc..16cf6d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java @@ -323,8 +323,8 @@ public class IncreaseContainerAllocator extends AbstractContainerAllocator { } } - // We already allocated something - if (!assigned.getSkipped()) { + // We may have allocated something + if (assigned != null && !assigned.getSkipped()) { break; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee1ce393/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java index 23283f6..672af64 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java @@ -285,6 +285,78 @@ public class TestContainerResizing { } @Test + public void testIncreaseRequestWithNoHeadroomLeft() throws Exception { + /** + * Application has two containers running, try to increase one of them, the + * requested amount exceeds user's headroom for the queue. + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate 1 container + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", + Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + sentRMContainerLaunched(rm1, containerId2); + + // am1 asks to change container2 from 2GB to 8GB, which will exceed user + // limit + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId2, Resources.createResource(8 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer1 = app.getLiveContainersMap().get(containerId2); + + /* Check reservation statuses */ + // Increase request should *NOT* be reserved as it exceeds user limit + Assert.assertFalse(rmContainer1.hasIncreaseReservation()); + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will *NOT* be updated + checkUsedResource(rm1, "default", 3 * GB, null); + Assert.assertEquals(3 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + rm1.close(); + } + + @Test public void testExcessiveReservationWhenCancelIncreaseRequest() throws Exception { /**