hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rkan...@apache.org
Subject hadoop git commit: YARN-7382. NoSuchElementException in FairScheduler after failover causes RM crash (rkanter)
Date Tue, 24 Oct 2017 17:32:37 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 3d36f75f2 -> 1d34a4805


YARN-7382. NoSuchElementException in FairScheduler after failover causes RM crash (rkanter)

(cherry picked from commit 025c6565725c1819566377632753e8b9055617a6)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1d34a480
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1d34a480
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1d34a480

Branch: refs/heads/branch-2
Commit: 1d34a4805e0b5472bb039ae05cdb052e2976ca14
Parents: 3d36f75
Author: Robert Kanter <rkanter@apache.org>
Authored: Tue Oct 24 10:21:44 2017 -0700
Committer: Robert Kanter <rkanter@apache.org>
Committed: Tue Oct 24 10:29:36 2017 -0700

----------------------------------------------------------------------
 .../scheduler/fair/FSAppAttempt.java            | 10 ++++++++++
 .../TestWorkPreservingRMRestart.java            | 21 +++++++++++++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/1d34a480/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
index 006acea..21863b8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
@@ -665,6 +665,16 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
       if (!rmContainer.getState().equals(RMContainerState.COMPLETED)) {
         getQueue().incUsedResource(rmContainer.getContainer().getResource());
       }
+
+      // If not running unmanaged, the first container we recover is always
+      // the AM. Set the amResource for this app and update the leaf queue's AM
+      // usage
+      if (!isAmRunning() && !getUnmanagedAM()) {
+        Resource resource = rmContainer.getAllocatedResource();
+        setAMResource(resource);
+        getQueue().addAMResourceUsage(resource);
+        setAmRunning(true);
+      }
     } finally {
       writeLock.unlock();
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1d34a480/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
index eb73db1..59f6092 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueu
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSParentQueue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerTestBase;
@@ -154,6 +155,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
         new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
     nm1.registerNode();
     RMApp app1 = rm1.submitApp(200);
+    Resource amResources = app1.getAMResourceRequests().get(0).getCapability();
     MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
 
     // clear queue metrics
@@ -236,7 +238,8 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
     if (getSchedulerType() == SchedulerType.CAPACITY) {
       checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
     } else {
-      checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
+      checkFSQueue(rm2, schedulerApp, usedResources, availableResources,
+          amResources);
     }
 
     // *********** check scheduler attempt state.********
@@ -306,6 +309,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
     RMApp app1 = rm1.submitApp(200, "dynamicQApp",
         UserGroupInformation.getCurrentUser().getShortUserName(), null,
         ReservationSystemTestUtil.getReservationQueueName());
+    Resource amResources = app1.getAMResourceRequests().get(0).getCapability();
     MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
 
     // clear queue metrics
@@ -380,7 +384,8 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
     if (getSchedulerType() == SchedulerType.CAPACITY) {
       checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
     } else {
-      checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
+      checkFSQueue(rm2, schedulerApp, usedResources, availableResources,
+          amResources);
     }
 
     // *********** check scheduler attempt state.********
@@ -452,7 +457,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
 
   private void checkFSQueue(ResourceManager rm,
       SchedulerApplication  schedulerApp, Resource usedResources,
-      Resource availableResources) throws Exception {
+      Resource availableResources, Resource amResources) throws Exception {
     // waiting for RM's scheduling apps
     int retry = 0;
     Resource assumedFairShare = Resource.newInstance(8192, 8);
@@ -484,6 +489,16 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
     assertMetrics(queueMetrics, 1, 0, 1, 0, 2, availableResources.getMemorySize(),
         availableResources.getVirtualCores(), usedResources.getMemorySize(),
         usedResources.getVirtualCores());
+
+    // ************ check AM resources ****************
+    assertEquals(amResources,
+        schedulerApp.getCurrentAppAttempt().getAMResource());
+    FSQueueMetrics fsQueueMetrics =
+        (FSQueueMetrics) schedulerApp.getQueue().getMetrics();
+    assertEquals(amResources.getMemorySize(),
+        fsQueueMetrics.getAMResourceUsageMB());
+    assertEquals(amResources.getVirtualCores(),
+        fsQueueMetrics.getAMResourceUsageVCores());
   }
 
   // create 3 container reports for AM


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message