Return-Path: X-Original-To: apmail-myriad-commits-archive@minotaur.apache.org Delivered-To: apmail-myriad-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4ADFA18CBF for ; Sun, 13 Mar 2016 21:18:27 +0000 (UTC) Received: (qmail 47485 invoked by uid 500); 13 Mar 2016 21:18:27 -0000 Delivered-To: apmail-myriad-commits-archive@myriad.apache.org Received: (qmail 47451 invoked by uid 500); 13 Mar 2016 21:18:27 -0000 Mailing-List: contact commits-help@myriad.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@myriad.incubator.apache.org Delivered-To: mailing list commits@myriad.incubator.apache.org Received: (qmail 47442 invoked by uid 99); 13 Mar 2016 21:18:27 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 13 Mar 2016 21:18:27 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id ADFC2C0E39 for ; Sun, 13 Mar 2016 21:18:26 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -3.221 X-Spam-Level: X-Spam-Status: No, score=-3.221 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.001] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id wKPjBHbSBuSs for ; Sun, 13 Mar 2016 21:18:25 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with SMTP id 3A35E5F620 for ; Sun, 13 Mar 2016 21:18:25 +0000 (UTC) Received: (qmail 47428 invoked by uid 99); 13 Mar 2016 21:18:24 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 13 Mar 2016 21:18:24 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 439EFDFBDB; Sun, 13 Mar 2016 21:18:24 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: darinj@apache.org To: commits@myriad.incubator.apache.org Message-Id: <5e860875afb041d69cbbcf935b200cdf@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: incubator-myriad git commit: [Myriad 188] - NodeManager switch to UNHEALTHY causes NPE on ResourceManager. Date: Sun, 13 Mar 2016 21:18:24 +0000 (UTC) Repository: incubator-myriad Updated Branches: refs/heads/master 79ba4a5f0 -> 4bce035ec [Myriad 188] - NodeManager switch to UNHEALTHY causes NPE on ResourceManager. JIRA: [Myriad-188] https://issues.apache.org/jira/browse/MYRIAD-188 [Myriad-156] https://issues.apache.org/jira/browse/MYRIAD-156 Pull Request: Closes #62 Author: darinj darinj@apache.org Project: http://git-wip-us.apache.org/repos/asf/incubator-myriad/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-myriad/commit/4bce035e Tree: http://git-wip-us.apache.org/repos/asf/incubator-myriad/tree/4bce035e Diff: http://git-wip-us.apache.org/repos/asf/incubator-myriad/diff/4bce035e Branch: refs/heads/master Commit: 4bce035ec8feff66f4804cc121c3051f5ecff4fa Parents: 79ba4a5 Author: darinj Authored: Tue Mar 8 05:51:51 2016 -0500 Committer: darinj Committed: Sun Mar 13 17:03:54 2016 -0400 ---------------------------------------------------------------------- .../myriad/scheduler/fgs/YarnNodeCapacityManager.java | 12 +++++++++--- .../apache/myriad/scheduler/fgs/FGSTestBaseSpec.groovy | 7 +++++-- .../scheduler/fgs/YarnNodeCapacityManagerSpec.groovy | 5 +++-- 3 files changed, 17 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/4bce035e/myriad-scheduler/src/main/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManager.java ---------------------------------------------------------------------- diff --git a/myriad-scheduler/src/main/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManager.java b/myriad-scheduler/src/main/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManager.java index 1a5d185..1dee5fa 100644 --- a/myriad-scheduler/src/main/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManager.java +++ b/myriad-scheduler/src/main/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManager.java @@ -70,6 +70,7 @@ import org.slf4j.LoggerFactory; */ public class YarnNodeCapacityManager extends BaseInterceptor { private static final Logger LOGGER = LoggerFactory.getLogger(YarnNodeCapacityManager.class); + private final AbstractYarnScheduler yarnScheduler; private final RMContext rmContext; private final MyriadDriver myriadDriver; @@ -247,9 +248,14 @@ public class YarnNodeCapacityManager extends BaseInterceptor { rmNode.getTotalCapability().setVirtualCores(newCapacity.getVirtualCores()); LOGGER.debug("Setting capacity for node {} to {}", rmNode.getHostName(), newCapacity); // updates the scheduler with the new capacity for the NM. - // the event is handled by the scheduler asynchronously - rmContext.getDispatcher().getEventHandler().handle(new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption.newInstance( - rmNode.getTotalCapability(), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); + synchronized (yarnScheduler) { + if (yarnScheduler.getSchedulerNode(rmNode.getNodeID()) != null) { + yarnScheduler.updateNodeResource(rmNode, + ResourceOption.newInstance(rmNode.getTotalCapability(), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT)); + } else { + LOGGER.info("Yarn Scheduler doesn't have node {}, probably UNHEALTHY", rmNode.getNodeID()); + } + } } } http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/4bce035e/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/FGSTestBaseSpec.groovy ---------------------------------------------------------------------- diff --git a/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/FGSTestBaseSpec.groovy b/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/FGSTestBaseSpec.groovy index 5b15e59..c769999 100644 --- a/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/FGSTestBaseSpec.groovy +++ b/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/FGSTestBaseSpec.groovy @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSSchedulerNode import org.apache.hadoop.yarn.util.resource.Resources import org.apache.mesos.Protos import org.apache.mesos.SchedulerDriver @@ -65,6 +66,7 @@ class FGSTestBaseSpec extends Specification { def rmNodes = new ConcurrentHashMap() + RMNode getRMNode(int cpu, int mem, String host, Protos.SlaveID slaveId) { RMNode rmNode = MockNodes.newNodeInfo(0, Resources.createResource(mem, cpu), 0, host) if (rmNodes[rmNode.getNodeID()]) { @@ -80,11 +82,9 @@ class FGSTestBaseSpec extends Specification { SchedulerNode getSchedulerNode(RMNode rmNode) { SchedulerNode schedulerNode = new SchedulerNode(rmNode, false) { - @Override void reserveResource(SchedulerApplicationAttempt attempt, Priority priority, RMContainer container) { } - @Override void unreserveResource(SchedulerApplicationAttempt attempt) { } @@ -92,6 +92,7 @@ class FGSTestBaseSpec extends Specification { return schedulerNode } + /******************* RMContext Related ****************/ def publisher = Mock(SystemMetricsPublisher) {} @@ -143,6 +144,8 @@ class FGSTestBaseSpec extends Specification { AbstractYarnScheduler yarnScheduler = Mock(AbstractYarnScheduler) { getRMContainer(_ as ContainerId) >> { ContainerId cid -> fgsContainers.get(cid).rmContainer } + getSchedulerNode(_ as NodeId) >> { NodeId nodeId -> getSchedulerNode(rmNodes.get(nodeId)) } + updateNodeResource(_ as RMNode, _ as ResourceOption) >> { } } FGSContainer getFGSContainer(RMNode node, int cid, int cpu, int mem, ContainerState state) { http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/4bce035e/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManagerSpec.groovy ---------------------------------------------------------------------- diff --git a/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManagerSpec.groovy b/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManagerSpec.groovy index 57e6384..5d59c68 100644 --- a/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManagerSpec.groovy +++ b/myriad-scheduler/src/test/java/org/apache/myriad/scheduler/fgs/YarnNodeCapacityManagerSpec.groovy @@ -19,6 +19,8 @@ package org.apache.myriad.scheduler.fgs import org.apache.hadoop.yarn.api.records.ContainerState +import org.apache.hadoop.yarn.api.records.ResourceOption +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeResourceUpdateSchedulerEvent import org.apache.hadoop.yarn.util.resource.Resources import org.apache.mesos.Protos @@ -115,7 +117,7 @@ class YarnNodeCapacityManagerSpec extends FGSTestBaseSpec { then: zeroNM.getTotalCapability().getMemory() == 2048 zeroNM.getTotalCapability().getVirtualCores() == 2 - 1 * rmContext.getDispatcher().getEventHandler().handle(_ as NodeResourceUpdateSchedulerEvent) + 1 * yarnScheduler.updateNodeResource( _ as RMNode, _ as ResourceOption) } YarnNodeCapacityManager getYarnNodeCapacityManager() { @@ -137,6 +139,5 @@ class YarnNodeCapacityManagerSpec extends FGSTestBaseSpec { def taskUtils = new TaskUtils(cfg) return new YarnNodeCapacityManager(registry, yarnScheduler, rmContext, myriadDriver, offerLifecycleManager, nodeStore, state, taskUtils) - } }