hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From junping...@apache.org
Subject hadoop git commit: YARN-6872. Ensure apps could run given NodeLabels are disabled post RM switchover/restart. Contributed by Sunil G
Date Thu, 10 Aug 2017 04:06:06 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.8.2 984a08895 -> d1996152b


YARN-6872. Ensure apps could run given NodeLabels are disabled post RM switchover/restart.
Contributed by Sunil G

(cherry picked from commit e84a3f43a1b114d85a3c60151c821b1ca753f81a)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d1996152
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d1996152
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d1996152

Branch: refs/heads/branch-2.8.2
Commit: d1996152bd35d7abd6763dd494f660cd3045bab9
Parents: 984a088
Author: Jian He <jianhe@apache.org>
Authored: Wed Aug 2 00:09:25 2017 -0700
Committer: Junping Du <junping_du@apache.org>
Committed: Wed Aug 9 21:03:41 2017 -0700

----------------------------------------------------------------------
 .../server/resourcemanager/RMAppManager.java    | 66 +++-----------------
 .../scheduler/AppSchedulingInfo.java            |  8 +--
 .../scheduler/SchedulerApplicationAttempt.java  |  2 +-
 .../scheduler/SchedulerUtils.java               | 17 +++--
 .../server/resourcemanager/TestRMRestart.java   |  8 +--
 5 files changed, 30 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1996152/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
index ff0bebb..55e8be3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
@@ -17,11 +17,7 @@
  */
 package org.apache.hadoop.yarn.server.resourcemanager;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.LinkedList;
-import java.util.Map;
-
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -37,7 +33,6 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
-import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
@@ -56,11 +51,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.Map;
 
 /**
  * This class manages the list of applications for the resource manager. 
@@ -324,34 +321,6 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
         createAndPopulateNewRMApp(appContext, appState.getSubmitTime(),
             appState.getUser(), true);
 
-    // If null amReq has been returned, check if it is the case that
-    // application has specified node label expression while node label
-    // has been disabled. Reject the recovery of this application if it
-    // is true and give clear message so that user can react properly.
-    if (!appContext.getUnmanagedAM() &&
-        application.getAMResourceRequest() == null &&
-        !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
-      // check application submission context and see if am resource request
-      // or application itself contains any node label expression.
-      ResourceRequest amReqFromAppContext =
-          appContext.getAMContainerResourceRequest();
-      String labelExp = (amReqFromAppContext != null) ?
-          amReqFromAppContext.getNodeLabelExpression() : null;
-      if (labelExp == null) {
-        labelExp = appContext.getNodeLabelExpression();
-      }
-      if (labelExp != null &&
-          !labelExp.equals(RMNodeLabelsManager.NO_LABEL)) {
-        String message = "Failed to recover application " + appId
-            + ". NodeLabel is not enabled in cluster, but AM resource request "
-            + "contains a label expression.";
-        LOG.warn(message);
-        application.handle(
-            new RMAppEvent(appId, RMAppEventType.APP_REJECTED, message));
-        return;
-      }
-    }
-
     application.handle(new RMAppRecoverEvent(appId, rmState));
   }
 
@@ -368,28 +337,9 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
     }
     
     ApplicationId applicationId = submissionContext.getApplicationId();
-    ResourceRequest amReq = null;
-    try {
-      amReq = validateAndCreateResourceRequest(submissionContext, isRecovery);
-    } catch (InvalidLabelResourceRequestException e) {
-      // This can happen if the application had been submitted and run
-      // with Node Label enabled but recover with Node Label disabled.
-      // Thus there might be node label expression in the application's
-      // resource requests. If this is the case, create RmAppImpl with
-      // null amReq and reject the application later with clear error
-      // message. So that the application can still be tracked by RM
-      // after recovery and user can see what's going on and react accordingly.
-      if (isRecovery &&
-          !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("AMResourceRequest is not created for " + applicationId
-              + ". NodeLabel is not enabled in cluster, but AM resource "
-              + "request contains a label expression.");
-        }
-      } else {
-        throw e;
-      }
-    }
+
+    ResourceRequest amReq = validateAndCreateResourceRequest(
+        submissionContext, isRecovery);
 
     // Verify and get the update application priority and set back to
     // submissionContext

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1996152/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
index fc6c44f..c77dca5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
@@ -815,7 +815,8 @@ public class AppSchedulingInfo {
     this.placesBlacklistedByApp = appInfo.getBlackList();
   }
 
-  public synchronized void recoverContainer(RMContainer rmContainer) {
+  public synchronized void recoverContainer(RMContainer rmContainer,
+      String partition) {
     QueueMetrics metrics = queue.getMetrics();
     if (pending) {
       // If there was any container to recover, the application was
@@ -828,9 +829,8 @@ public class AppSchedulingInfo {
     if (rmContainer.getState().equals(RMContainerState.COMPLETED)) {
       return;
     }
-    metrics.allocateResources(rmContainer.getNodeLabelExpression(),
-        user, 1, rmContainer.getAllocatedResource(),
-      false);
+    metrics.allocateResources(partition, user, 1,
+        rmContainer.getAllocatedResource(), false);
   }
   
   public ResourceRequest cloneResourceRequest(ResourceRequest request) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1996152/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
index b4a2898..28067be 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
@@ -779,7 +779,7 @@ public class SchedulerApplicationAttempt implements SchedulableEntity
{
   public synchronized void recoverContainer(SchedulerNode node,
       RMContainer rmContainer) {
     // recover app scheduling info
-    appSchedulingInfo.recoverContainer(rmContainer);
+    appSchedulingInfo.recoverContainer(rmContainer,  node.getPartition());
 
     if (rmContainer.getState().equals(RMContainerState.COMPLETED)) {
       return;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1996152/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
index c999e26..93134dd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
@@ -22,6 +22,8 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.conf.Configuration;
@@ -51,7 +53,9 @@ import org.apache.hadoop.yarn.util.resource.Resources;
 @Private
 @Unstable
 public class SchedulerUtils {
-  
+
+  private static final Log LOG = LogFactory.getLog(SchedulerUtils.class);
+
   private static final RecordFactory recordFactory = 
       RecordFactoryProvider.getRecordFactory(null);
 
@@ -230,9 +234,14 @@ public class SchedulerUtils {
       String labelExp = resReq.getNodeLabelExpression();
       if (!(RMNodeLabelsManager.NO_LABEL.equals(labelExp)
           || null == labelExp)) {
-        throw new InvalidLabelResourceRequestException(
-            "Invalid resource request, node label not enabled "
-                + "but request contains label expression");
+        String message = "NodeLabel is not enabled in cluster, but resource"
+            + " request contains a label expression.";
+        LOG.warn(message);
+        if (!isRecovery) {
+          throw new InvalidLabelResourceRequestException(
+              "Invalid resource request, node label not enabled "
+                  + "but request contains label expression");
+        }
       }
     }
     if (null == queueInfo) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1996152/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
index 76a3488..9ca16dd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
@@ -2397,14 +2397,14 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase
{
       }
     };
 
-    // rm should successfully start with app1 loaded back in FAILED state
-    // due to node label not enabled but am resource request contains
-    // node label expression.
+    // rm should successfully start with app1 loaded back in SUCCESS state
+    // by pushing app to run default label for am container and let other
+    // containers to run normally.
+
     try {
       rm2.start();
       Assert.assertTrue("RM start successfully", true);
       Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
-      rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
     } catch (Exception e) {
       LOG.debug("Exception on start", e);
       Assert.fail("RM should start without any issue");


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message