Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id A2299200D0B for ; Wed, 13 Sep 2017 01:11:01 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id A034F1609C8; Tue, 12 Sep 2017 23:11:01 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 980AB1609B4 for ; Wed, 13 Sep 2017 01:11:00 +0200 (CEST) Received: (qmail 47131 invoked by uid 500); 12 Sep 2017 23:10:59 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 47122 invoked by uid 99); 12 Sep 2017 23:10:59 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 12 Sep 2017 23:10:59 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 443B2F57F0; Tue, 12 Sep 2017 23:10:59 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: asuresh@apache.org To: common-commits@hadoop.apache.org Message-Id: <286a6a4677e840458825ef224ef6cae1@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: YARN-7185. ContainerScheduler should only look at availableResource for GUARANTEED containers when OPPORTUNISTIC container queuing is enabled. (Wangda Tan via asuresh) Date: Tue, 12 Sep 2017 23:10:59 +0000 (UTC) archived-at: Tue, 12 Sep 2017 23:11:01 -0000 Repository: hadoop Updated Branches: refs/heads/trunk 86f4d1c66 -> 2ae72692f YARN-7185. ContainerScheduler should only look at availableResource for GUARANTEED containers when OPPORTUNISTIC container queuing is enabled. (Wangda Tan via asuresh) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2ae72692 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2ae72692 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2ae72692 Branch: refs/heads/trunk Commit: 2ae72692fc370267141a1ee55ef372ff62302b54 Parents: 86f4d1c Author: Arun Suresh Authored: Tue Sep 12 16:10:08 2017 -0700 Committer: Arun Suresh Committed: Tue Sep 12 16:10:08 2017 -0700 ---------------------------------------------------------------------- .../scheduler/ContainerScheduler.java | 45 ++++++--- .../containermanager/TestContainerManager.java | 2 +- ...ContainerSchedulerBehaviorCompatibility.java | 98 ++++++++++++++++++++ 3 files changed, 130 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ae72692/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java index 644bdae..7780f9f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java @@ -238,6 +238,11 @@ public class ContainerScheduler extends AbstractService implements return this.queuedOpportunisticContainers.size(); } + @VisibleForTesting + public int getNumRunningContainers() { + return this.runningContainers.size(); + } + public OpportunisticContainersStatus getOpportunisticContainersStatus() { this.opportunisticContainersStatus.setQueuedOpportContainers( getNumQueuedOpportunisticContainers()); @@ -274,27 +279,32 @@ public class ContainerScheduler extends AbstractService implements ExecutionType.OPPORTUNISTIC) { this.metrics.completeOpportunisticContainer(container.getResource()); } - startPendingContainers(); + startPendingContainers(false); } } - private void startPendingContainers() { + /** + * Start pending containers in the queue. + * @param forceStartGuaranteedContaieners When this is true, start guaranteed + * container without looking at available resource + */ + private void startPendingContainers(boolean forceStartGuaranteedContaieners) { // Start pending guaranteed containers, if resources available. - boolean resourcesAvailable = - startContainersFromQueue(queuedGuaranteedContainers.values()); + boolean resourcesAvailable = startContainers( + queuedGuaranteedContainers.values(), forceStartGuaranteedContaieners); // Start opportunistic containers, if resources available. if (resourcesAvailable) { - startContainersFromQueue(queuedOpportunisticContainers.values()); + startContainers(queuedOpportunisticContainers.values(), false); } } - private boolean startContainersFromQueue( - Collection queuedContainers) { - Iterator cIter = queuedContainers.iterator(); + private boolean startContainers( + Collection containersToBeStarted, boolean force) { + Iterator cIter = containersToBeStarted.iterator(); boolean resourcesAvailable = true; while (cIter.hasNext() && resourcesAvailable) { Container container = cIter.next(); - if (tryStartContainer(container)) { + if (tryStartContainer(container, force)) { cIter.remove(); } else { resourcesAvailable = false; @@ -303,9 +313,11 @@ public class ContainerScheduler extends AbstractService implements return resourcesAvailable; } - private boolean tryStartContainer(Container container) { + private boolean tryStartContainer(Container container, boolean force) { boolean containerStarted = false; - if (resourceAvailableToStartContainer(container)) { + // call startContainer without checking available resource when force==true + if (force || resourceAvailableToStartContainer( + container)) { startContainer(container); containerStarted = true; } @@ -373,7 +385,12 @@ public class ContainerScheduler extends AbstractService implements // enough number of opportunistic containers. if (isGuaranteedContainer) { enqueueContainer(container); - startPendingContainers(); + + // When opportunistic container not allowed (which is determined by + // max-queue length of pending opportunistic containers <= 0), start + // guaranteed containers without looking at available resources. + boolean forceStartGuaranteedContainers = (maxOppQueueLength <= 0); + startPendingContainers(forceStartGuaranteedContainers); // if the guaranteed container is queued, we need to preempt opportunistic // containers for make room for it @@ -386,12 +403,12 @@ public class ContainerScheduler extends AbstractService implements // containers based on remaining resource available, then enqueue the // opportunistic container. If the container is enqueued, we do another // pass to try to start the newly enqueued opportunistic container. - startPendingContainers(); + startPendingContainers(false); boolean containerQueued = enqueueContainer(container); // container may not get queued because the max opportunistic container // queue length is reached. If so, there is no point doing another pass if (containerQueued) { - startPendingContainers(); + startPendingContainers(false); } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ae72692/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index f379c08..6eea77b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -147,7 +147,7 @@ public class TestContainerManager extends BaseContainerManagerTest { @Before public void setup() throws IOException { conf.setInt( - YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10); + YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 0); super.setup(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ae72692/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerBehaviorCompatibility.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerBehaviorCompatibility.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerBehaviorCompatibility.java new file mode 100644 index 0000000..c531cc8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerBehaviorCompatibility.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Make sure CantainerScheduler related changes are compatible to old behaviors + */ +public class TestContainerSchedulerBehaviorCompatibility + extends BaseContainerManagerTest { + public TestContainerSchedulerBehaviorCompatibility() + throws UnsupportedFileSystemException { + super(); + } + + @Before + public void setup() throws IOException { + conf.setInt(YarnConfiguration.NM_VCORES, 1); + conf.setInt(YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, + 0); + super.setup(); + } + + @Test + public void testForceStartGuaranteedContainersWhenOppContainerDisabled() + throws Exception { + containerManager.start(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + containerLaunchContext.setCommands(Arrays.asList("echo")); + + List list = new ArrayList<>(); + + // Add a container start request with #vcores > available (1). + // This could happen when DefaultContainerCalculator configured because + // on the RM side it won't check vcores at all. + list.add(StartContainerRequest.newInstance(containerLaunchContext, + createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER, + context.getNodeId(), user, BuilderUtils.newResource(2048, 4), + context.getContainerTokenSecretManager(), null, + ExecutionType.GUARANTEED))); + + StartContainersRequest allRequests = StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + ContainerScheduler cs = containerManager.getContainerScheduler(); + int nQueuedContainers = cs.getNumQueuedContainers(); + int nRunningContainers = cs.getNumRunningContainers(); + + // Wait at most 10 secs and we expect all containers finished. + int maxTry = 100; + int nTried = 1; + while (nQueuedContainers != 0 || nRunningContainers != 0) { + Thread.sleep(100); + nQueuedContainers = cs.getNumQueuedContainers(); + nRunningContainers = cs.getNumRunningContainers(); + nTried++; + if (nTried > maxTry) { + Assert.fail("Failed to get either number of queuing containers to 0 or " + + "number of running containers to 0, #queued=" + nQueuedContainers + + ", #running=" + nRunningContainers); + } + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org