Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id C001D200D13 for ; Fri, 15 Sep 2017 18:56:29 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id BECF01609D1; Fri, 15 Sep 2017 16:56:29 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id DEBC31609D2 for ; Fri, 15 Sep 2017 18:56:28 +0200 (CEST) Received: (qmail 40936 invoked by uid 500); 15 Sep 2017 16:56:15 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 37180 invoked by uid 99); 15 Sep 2017 16:56:13 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Sep 2017 16:56:13 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D8788F5833; Fri, 15 Sep 2017 16:56:12 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: haibochen@apache.org To: common-commits@hadoop.apache.org Date: Fri, 15 Sep 2017 16:57:00 -0000 Message-Id: <34f1f973de6e424c9f700c536eeaf5e5@git.apache.org> In-Reply-To: <60195165f39d4b1f84823bfa91281ba2@git.apache.org> References: <60195165f39d4b1f84823bfa91281ba2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [50/50] [abbrv] hadoop git commit: YARN-6705 Add separate NM preemption thresholds for cpu and memory (Haibo Chen) archived-at: Fri, 15 Sep 2017 16:56:29 -0000 YARN-6705 Add separate NM preemption thresholds for cpu and memory (Haibo Chen) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6d9500c9 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6d9500c9 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6d9500c9 Branch: refs/heads/YARN-1011 Commit: 6d9500c9966a7c0366e4f192fac58c7eb1d523d4 Parents: 6ca0bc8 Author: Haibo Chen Authored: Wed Jul 12 12:32:13 2017 -0700 Committer: Haibo Chen Committed: Fri Sep 15 09:54:55 2017 -0700 ---------------------------------------------------------------------- .../hadoop/yarn/conf/YarnConfiguration.java | 31 +++++++++++++-- .../src/main/resources/yarn-default.xml | 34 ++++++++++++++-- .../monitor/ContainersMonitorImpl.java | 42 +++++++++++++------- 3 files changed, 85 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d9500c9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 9ecde42..f346db1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1741,10 +1741,33 @@ public class YarnConfiguration extends Configuration { public static final String NM_OVERALLOCATION_MEMORY_UTILIZATION_THRESHOLD = NM_PREFIX + "overallocation.memory-utilization-threshold"; - public static final String NM_OVERALLOCATION_PREEMPTION_THRESHOLD = - NM_PREFIX + "overallocation.preemption-threshold"; - public static final float DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD - = 0.96f; + /** + * The CPU utilization threshold, if went beyond for a few times in a row, + * OPPORTUNISTIC containers started due to overallocation should start + * getting preempted. + */ + public static final String NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD = + NM_PREFIX + "overallocation.preemption-threshold.cpu"; + public static final float + DEFAULT_NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD = 0.99f; + + /** + * The number of times that CPU utilization must go over the CPU preemption + * threshold consecutively before preemption starts to kick in. + */ + public static final String NM_OVERALLOCATION_PREEMPTION_CPU_COUNT = + NM_PREFIX + "overallocation.preemption-threshold-count.cpu"; + public static final int DEFAULT_NM_OVERALLOCATION_PREEMPTION_CPU_COUNT = 4; + + + /** + * The memory utilization threshold beyond which OPPORTUNISTIC containers + * started due to overallocation should start getting preempted. + */ + public static final String NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD = + NM_PREFIX + "overallocation.preemption-threshold.memory"; + public static final float + DEFAULT_NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD = 0.95f; /** * Interval of time the linux container executor should try cleaning up http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d9500c9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 20d4c1d..681e394 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1639,11 +1639,37 @@ When a node is over-allocated to improve utilization by - running OPPORTUNISTIC containers, this config captures the utilization - beyond which OPPORTUNISTIC containers should start getting preempted. + running OPPORTUNISTIC containers, this config captures the CPU + utilization beyond which OPPORTUNISTIC containers should start getting + preempted. This is used in combination with + yarn.nodemanager.overallocation.preemption-threshold-count.cpu, that is, + only when the CPU utilization goes over this threshold consecutively for + a few times will preemption kicks in. - yarn.nodemanager.overallocation.preemption-threshold - 0.96 + yarn.nodemanager.overallocation.preemption-threshold.cpu + 0.99 + + + + When a node is over-allocated to improve utilization by + running OPPORTUNISTIC containers, this config captures the number of + times that CPU utilization has to go above + ${yarn.nodemanager.overallocation.preemption-threshold.cpu} + consecutively for NM to start preempting OPPORTUNISTIC containers + started due to overallocation. + + yarn.nodemanager.overallocation.preemption-threshold-count.cpu + 4 + + + + When a node is over-allocated to improve utilization by + running OPPORTUNISTIC containers, this config captures the CPU + utilization beyond which OPPORTUNISTIC containers should start getting + preempted. + + yarn.nodemanager.overallocation.preemption-threshold.memory + 0.95 http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d9500c9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 941997e..2c1e088 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -103,6 +103,7 @@ public class ContainersMonitorImpl extends AbstractService implements private ResourceUtilization containersUtilization; private ResourceThresholds overAllocationPreemptionThresholds; + private int overAlloctionPreemptionCpuCount = -1; private volatile boolean stopped = false; @@ -237,7 +238,7 @@ public class ContainersMonitorImpl extends AbstractService implements YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD); if (overAllocationMemoryUtilizationThreshold <= 0) { LOG.info("NodeManager oversubscription is disabled because the memory " + - "utilization threshold is no larger than zero."); + "overallocation threshold is no larger than zero."); return; } @@ -249,36 +250,49 @@ public class ContainersMonitorImpl extends AbstractService implements YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD); if (overAllocationCpuUtilizationThreshold <= 0) { LOG.info("NodeManager oversubscription is disabled because the CPU " + - "utilization threshold is no larger than zero."); + "overallocation threshold is no larger than zero."); return; } - float preemptionThreshold = conf.getFloat( - YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_THRESHOLD, - YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD); - if (preemptionThreshold <= overAllocationCpuUtilizationThreshold) { - LOG.info("NodeManager oversubscription is disabled because preemption" + - "threshold is no larger than the cpu utilization threshold."); + float cpuPreemptionThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD, + YarnConfiguration. + DEFAULT_NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD); + if (cpuPreemptionThreshold <= overAllocationCpuUtilizationThreshold) { + LOG.info("NodeManager oversubscription is disabled because the cpu " + + " preemption threshold is no larger than the cpu overallocation" + + " threshold."); return; } - if (preemptionThreshold <= overAllocationMemoryUtilizationThreshold) { - LOG.info("NodeManager oversubscription is disabled because preemption" + - "threshold is no larger than the memory utilization threshold."); + + float memoryPreemptionThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD, + YarnConfiguration. + DEFAULT_NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD); + if (memoryPreemptionThreshold <= overAllocationMemoryUtilizationThreshold) { + LOG.info("NodeManager oversubscription is disabled because the memory" + + " preemption threshold is no larger than the memory overallocation" + + " threshold."); return; } + this.overAlloctionPreemptionCpuCount = conf.getInt( + YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_CPU_COUNT, + YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_CPU_COUNT); + ResourceThresholds resourceThresholds = ResourceThresholds.newInstance( overAllocationCpuUtilizationThreshold, overAllocationMemoryUtilizationThreshold); ((NodeManager.NMContext) context).setOverAllocationInfo( OverAllocationInfo.newInstance(resourceThresholds)); - this.overAllocationPreemptionThresholds = - ResourceThresholds.newInstance(preemptionThreshold); + this.overAllocationPreemptionThresholds = ResourceThresholds.newInstance( + cpuPreemptionThreshold, memoryPreemptionThreshold); LOG.info("NodeManager oversubscription enabled with overallocation " + "thresholds (memory:" + overAllocationMemoryUtilizationThreshold + ", CPU:" + overAllocationCpuUtilizationThreshold + ") and preemption" + - " threshold: " + preemptionThreshold); + " threshold (memory:" + memoryPreemptionThreshold + ", CPU:" + + cpuPreemptionThreshold + ")"); } private boolean isResourceCalculatorAvailable() { --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org