hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wan...@apache.org
Subject hadoop git commit: YARN-5195. RM intermittently crashed with NPE while handling APP_ATTEMPT_REMOVED event when async-scheduling enabled in CapacityScheduler. (sandflee via wangda)
Date Wed, 27 Jul 2016 04:44:22 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 55fa19d57 -> 77e0b6d1b


YARN-5195. RM intermittently crashed with NPE while handling APP_ATTEMPT_REMOVED event when
async-scheduling enabled in CapacityScheduler. (sandflee via wangda)

(cherry picked from commit d62e121ffc0239e7feccc1e23ece92c5fac685f6)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/77e0b6d1
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/77e0b6d1
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/77e0b6d1

Branch: refs/heads/branch-2
Commit: 77e0b6d1bf94079fb01580924a34aaae56da869d
Parents: 55fa19d
Author: Wangda Tan <wangda@apache.org>
Authored: Tue Jul 26 21:22:59 2016 -0700
Committer: Wangda Tan <wangda@apache.org>
Committed: Tue Jul 26 21:36:37 2016 -0700

----------------------------------------------------------------------
 .../scheduler/capacity/CapacityScheduler.java   |  9 ++++-
 .../capacity/TestCapacityScheduler.java         | 40 ++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/77e0b6d1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index ee62a70..bedf455 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -1209,11 +1209,18 @@ public class CapacityScheduler extends
  }
 
   @VisibleForTesting
-  protected synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
+  public synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
     if (rmContext.isWorkPreservingRecoveryEnabled()
         && !rmContext.isSchedulerReadyForAllocatingContainers()) {
       return;
     }
+
+    if (!nodeTracker.exists(node.getNodeID())) {
+      LOG.info("Skipping scheduling as the node " + node.getNodeID() +
+          " has been removed");
+      return;
+    }
+
     // reset allocation and reservation stats before we start doing any work
     updateSchedulerHealth(lastNodeUpdateTime, node,
       new CSAssignment(Resources.none(), NodeType.NODE_LOCAL));

http://git-wip-us.apache.org/repos/asf/hadoop/blob/77e0b6d1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
index fb021c0..d3567f5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
@@ -3375,4 +3375,44 @@ public class TestCapacityScheduler {
     Assert.assertEquals(availableResource.getMemorySize(), 0);
     Assert.assertEquals(availableResource.getVirtualCores(), 0);
   }
+
+  @Test
+  public void testSchedulingOnRemovedNode() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+        ResourceScheduler.class);
+    conf.setBoolean(
+        CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE,
+            false);
+
+    MockRM rm = new MockRM(conf);
+    rm.start();
+    RMApp app = rm.submitApp(100);
+    rm.drainEvents();
+
+    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10240, 10);
+    MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
+
+    //remove nm2 to keep am alive
+    MockNM nm2 = rm.registerNode("127.0.0.1:1235", 10240, 10);
+
+    am.allocate(ResourceRequest.ANY, 2048, 1, null);
+
+    CapacityScheduler scheduler =
+        (CapacityScheduler) rm.getRMContext().getScheduler();
+    FiCaSchedulerNode node =
+        (FiCaSchedulerNode)
+            scheduler.getNodeTracker().getNode(nm2.getNodeId());
+    scheduler.handle(new NodeRemovedSchedulerEvent(
+        rm.getRMContext().getRMNodes().get(nm2.getNodeId())));
+    // schedulerNode is removed, try allocate a container
+    scheduler.allocateContainersToNode(node);
+
+    AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
+        new AppAttemptRemovedSchedulerEvent(
+            am.getApplicationAttemptId(),
+            RMAppAttemptState.FINISHED, false);
+    scheduler.handle(appRemovedEvent1);
+    rm.stop();
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message