Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4473017AFE for ; Fri, 6 Feb 2015 21:46:49 +0000 (UTC) Received: (qmail 56493 invoked by uid 500); 6 Feb 2015 21:46:34 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 55885 invoked by uid 500); 6 Feb 2015 21:46:34 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 54331 invoked by uid 99); 6 Feb 2015 21:46:33 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 06 Feb 2015 21:46:33 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4CABBE07F2; Fri, 6 Feb 2015 21:46:33 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zhz@apache.org To: common-commits@hadoop.apache.org Date: Fri, 06 Feb 2015 21:46:58 -0000 Message-Id: <0925675d32d84bb99bf0690382f1f654@git.apache.org> In-Reply-To: <45882ce1749b480984b67e2725c7f039@git.apache.org> References: <45882ce1749b480984b67e2725c7f039@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [26/50] [abbrv] hadoop git commit: YARN-3101. In Fair Scheduler, fix canceling of reservations for exceeding max share (Anubhav Dhoot via Sandy Ryza) YARN-3101. In Fair Scheduler, fix canceling of reservations for exceeding max share (Anubhav Dhoot via Sandy Ryza) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/650542ed Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/650542ed Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/650542ed Branch: refs/heads/HDFS-EC Commit: 650542eda412944b09269455474dbb2e6e7c387b Parents: c3973f1 Author: Sandy Ryza Authored: Thu Feb 5 09:35:47 2015 -0800 Committer: Zhe Zhang Committed: Fri Feb 6 13:45:50 2015 -0800 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/fair/FSAppAttempt.java | 4 + .../scheduler/fair/FairScheduler.java | 13 ++- .../scheduler/fair/TestFairScheduler.java | 112 +++++++++++++++---- 4 files changed, 108 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/650542ed/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 592ea54..936cdf4 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -482,6 +482,9 @@ Release 2.7.0 - UNRELEASED YARN-3058. Fix error message of tokens' activation delay configuration. (Yi Liu via ozawa) + YARN-3101. In Fair Scheduler, fix canceling of reservations for exceeding + max share (Anubhav Dhoot via Sandy Ryza) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/650542ed/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index b23ec3e..2cb0f0b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -530,6 +530,10 @@ public class FSAppAttempt extends SchedulerApplicationAttempt return container.getResource(); } else { + if (!FairScheduler.fitsInMaxShare(getQueue(), capability)) { + return Resources.none(); + } + // The desired container won't fit here, so reserve reserve(request.getPriority(), node, container, reserved); http://git-wip-us.apache.org/repos/asf/hadoop/blob/650542ed/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 1ace604..2b59716 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -1049,7 +1049,8 @@ public class FairScheduler extends FSQueue queue = reservedAppSchedulable.getQueue(); if (!reservedAppSchedulable.hasContainerForNode(reservedPriority, node) - || !fitInMaxShare(queue)) { + || !fitsInMaxShare(queue, + node.getReservedContainer().getReservedResource())) { // Don't hold the reservation if app can no longer use it LOG.info("Releasing reservation that cannot be satisfied for application " + reservedAppSchedulable.getApplicationAttemptId() @@ -1084,14 +1085,18 @@ public class FairScheduler extends updateRootQueueMetrics(); } - private boolean fitInMaxShare(FSQueue queue) { - if (Resources.fitsIn(queue.getResourceUsage(), queue.getMaxShare())) { + static boolean fitsInMaxShare(FSQueue queue, Resource + additionalResource) { + Resource usagePlusAddition = + Resources.add(queue.getResourceUsage(), additionalResource); + + if (!Resources.fitsIn(usagePlusAddition, queue.getMaxShare())) { return false; } FSQueue parentQueue = queue.getParent(); if (parentQueue != null) { - return fitInMaxShare(parentQueue); + return fitsInMaxShare(parentQueue, additionalResource); } return true; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/650542ed/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 5d079a3..c29dbfc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -784,19 +784,75 @@ public class TestFairScheduler extends FairSchedulerTestBase { } - @Test (timeout = 5000) - public void testContainerReservationNotExceedingQueueMax() throws Exception { + @Test (timeout = 500000) + public void testContainerReservationAttemptExceedingQueueMax() + throws Exception { conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); out.println(""); out.println(""); out.println(""); out.println(""); - out.println("1024mb,5vcores"); + out.println("2048mb,5vcores"); + out.println(""); + out.println(""); out.println("2048mb,10vcores"); out.println(""); + out.println(""); + out.println(""); + out.close(); + + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add a node + RMNode node1 = + MockNodes + .newNodeInfo(1, Resources.createResource(3072, 5), 1, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Queue 1 requests full capacity of the queue + createSchedulingRequest(2048, "queue1", "user1", 1); + scheduler.update(); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1); + scheduler.handle(updateEvent); + + // Make sure queue 1 is allocated app capacity + assertEquals(2048, scheduler.getQueueManager().getQueue("queue1"). + getResourceUsage().getMemory()); + + // Now queue 2 requests likewise + createSchedulingRequest(1024, "queue2", "user2", 1); + scheduler.update(); + scheduler.handle(updateEvent); + + // Make sure queue 2 is allocated app capacity + assertEquals(1024, scheduler.getQueueManager().getQueue("queue2"). + getResourceUsage().getMemory()); + + ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1", "user1", 1); + scheduler.update(); + scheduler.handle(updateEvent); + + // Ensure the reservation does not get created as allocated memory of + // queue1 exceeds max + assertEquals(0, scheduler.getSchedulerApp(attId1). + getCurrentReservation().getMemory()); + } + + @Test (timeout = 500000) + public void testContainerReservationNotExceedingQueueMax() throws Exception { + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println("3072mb,10vcores"); + out.println(""); out.println(""); - out.println("1024mb,5vcores"); out.println("2048mb,10vcores"); out.println(""); out.println(""); @@ -806,7 +862,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); - + // Add a node RMNode node1 = MockNodes @@ -825,7 +881,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { getResourceUsage().getMemory()); // Now queue 2 requests likewise - ApplicationAttemptId attId = createSchedulingRequest(1024, "queue2", "user2", 1); + createSchedulingRequest(1024, "queue2", "user2", 1); scheduler.update(); scheduler.handle(updateEvent); @@ -841,18 +897,34 @@ public class TestFairScheduler extends FairSchedulerTestBase { assertEquals(1024, scheduler.getSchedulerApp(attId1) .getCurrentReservation().getMemory()); - // Now remove app of queue2 - AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent( - attId, RMAppAttemptState.FINISHED, false); - scheduler.update(); - scheduler.handle(appRemovedEvent1); + // Exercise checks that reservation fits + scheduler.handle(updateEvent); + + // Ensure the reservation still exists as allocated memory of queue1 doesn't + // exceed max + assertEquals(1024, scheduler.getSchedulerApp(attId1). + getCurrentReservation().getMemory()); + + // Now reduce max Resources of queue1 down to 2048 + out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println("2048mb,10vcores"); + out.println(""); + out.println(""); + out.println("2048mb,10vcores"); + out.println(""); + out.println(""); + out.println(""); + out.close(); + + scheduler.reinitialize(conf, resourceManager.getRMContext()); - // Queue should have no apps - assertEquals(0, scheduler.getQueueManager().getQueue("queue2"). - getResourceUsage().getMemory()); - createSchedulingRequest(1024, "queue2", "user2", 1); scheduler.handle(updateEvent); + // Make sure allocated memory of queue1 doesn't exceed its maximum assertEquals(2048, scheduler.getQueueManager().getQueue("queue1"). getResourceUsage().getMemory()); @@ -2257,10 +2329,9 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.handle(updateEvent); assertEquals(1, app.getLiveContainers().size()); - // Reserved container should will be at higher priority, - // since old reservation cannot be satisfied + // Reserved container should still be at lower priority for (RMContainer container : app.getReservedContainers()) { - assertEquals(1, container.getReservedPriority().getPriority()); + assertEquals(2, container.getReservedPriority().getPriority()); } // Complete container @@ -2273,11 +2344,12 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.update(); scheduler.handle(updateEvent); - // Reserved container (at higher priority) should be run + // Reserved container (at lower priority) should be run Collection liveContainers = app.getLiveContainers(); assertEquals(1, liveContainers.size()); for (RMContainer liveContainer : liveContainers) { - Assert.assertEquals(1, liveContainer.getContainer().getPriority().getPriority()); + Assert.assertEquals(2, liveContainer.getContainer().getPriority() + .getPriority()); } assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB()); assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());