hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jia...@apache.org
Subject hadoop git commit: YARN-6872. Ensure apps could run given NodeLabels are disabled post RM switchover/restart. Contributed by Sunil G
Date Tue, 01 Aug 2017 17:00:10 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 72bde1dad -> 4006cc266


YARN-6872. Ensure apps could run given NodeLabels are disabled post RM switchover/restart.
Contributed by Sunil G


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4006cc26
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4006cc26
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4006cc26

Branch: refs/heads/branch-2
Commit: 4006cc266bd45c31be2f68d1ca5688d14d3f3ce3
Parents: 72bde1d
Author: Jian He <jianhe@apache.org>
Authored: Tue Aug 1 09:56:33 2017 -0700
Committer: Jian He <jianhe@apache.org>
Committed: Tue Aug 1 10:00:04 2017 -0700

----------------------------------------------------------------------
 .../server/resourcemanager/RMAppManager.java    | 56 +-------------------
 .../scheduler/AbstractYarnScheduler.java        | 11 +++-
 .../scheduler/SchedulerUtils.java               | 17 ++++--
 .../server/resourcemanager/TestRMRestart.java   |  8 +--
 4 files changed, 29 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/4006cc26/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
index 11610ff..2ebea21 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
@@ -41,7 +41,6 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
-import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.security.AccessRequest;
@@ -63,7 +62,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 
@@ -335,36 +333,6 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
         createAndPopulateNewRMApp(appContext, appState.getSubmitTime(),
             appState.getUser(), true);
 
-    // If null amReq has been returned, check if it is the case that
-    // application has specified node label expression while node label
-    // has been disabled. Reject the recovery of this application if it
-    // is true and give clear message so that user can react properly.
-    if (!appContext.getUnmanagedAM() &&
-        (application.getAMResourceRequests() == null ||
-            application.getAMResourceRequests().isEmpty()) &&
-        !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
-      // check application submission context and see if am resource request
-      // or application itself contains any node label expression.
-      List<ResourceRequest> amReqsFromAppContext =
-          appContext.getAMContainerResourceRequests();
-      String labelExp =
-          (amReqsFromAppContext != null && !amReqsFromAppContext.isEmpty()) ?
-          amReqsFromAppContext.get(0).getNodeLabelExpression() : null;
-      if (labelExp == null) {
-        labelExp = appContext.getNodeLabelExpression();
-      }
-      if (labelExp != null &&
-          !labelExp.equals(RMNodeLabelsManager.NO_LABEL)) {
-        String message = "Failed to recover application " + appId
-            + ". NodeLabel is not enabled in cluster, but AM resource request "
-            + "contains a label expression.";
-        LOG.warn(message);
-        application.handle(
-            new RMAppEvent(appId, RMAppEventType.APP_REJECTED, message));
-        return;
-      }
-    }
-
     application.handle(new RMAppRecoverEvent(appId, rmState));
   }
 
@@ -384,28 +352,8 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
     }
 
     ApplicationId applicationId = submissionContext.getApplicationId();
-    List<ResourceRequest> amReqs = null;
-    try {
-      amReqs = validateAndCreateResourceRequest(submissionContext, isRecovery);
-    } catch (InvalidLabelResourceRequestException e) {
-      // This can happen if the application had been submitted and run
-      // with Node Label enabled but recover with Node Label disabled.
-      // Thus there might be node label expression in the application's
-      // resource requests. If this is the case, create RmAppImpl with
-      // null amReq and reject the application later with clear error
-      // message. So that the application can still be tracked by RM
-      // after recovery and user can see what's going on and react accordingly.
-      if (isRecovery &&
-          !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("AMResourceRequest is not created for " + applicationId
-              + ". NodeLabel is not enabled in cluster, but AM resource "
-              + "request contains a label expression.");
-        }
-      } else {
-        throw e;
-      }
-    }
+    List<ResourceRequest> amReqs = validateAndCreateResourceRequest(
+        submissionContext, isRecovery);
 
     // Verify and get the update application priority and set back to
     // submissionContext

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4006cc26/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
index ed5bdde..30cd5ce 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
@@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstant
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
@@ -518,12 +519,20 @@ public abstract class AbstractYarnScheduler
     container.setVersion(status.getVersion());
     ApplicationAttemptId attemptId =
         container.getId().getApplicationAttemptId();
+    String labelExpression = status.getNodeLabelExpression();
+    // If NodeLabel is disabled but recovered container has label expression
+    // its better to suppress that and considered as default label.
+    if (!status.getNodeLabelExpression().isEmpty() && !YarnConfiguration
+        .areNodeLabelsEnabled(rmContext.getYarnConfiguration())) {
+      labelExpression = RMNodeLabelsManager.NO_LABEL;
+    }
+
     RMContainer rmContainer =
         new RMContainerImpl(container,
             SchedulerRequestKey.extractFrom(container), attemptId,
             node.getNodeID(), applications.get(
             attemptId.getApplicationId()).getUser(), rmContext,
-            status.getCreationTime(), status.getNodeLabelExpression());
+            status.getCreationTime(), labelExpression);
     return rmContainer;
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4006cc26/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
index c67f1ce..7b554db 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
@@ -22,6 +22,8 @@ import java.util.Set;
 
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.conf.Configuration;
@@ -55,7 +57,9 @@ import org.apache.hadoop.yarn.util.resource.Resources;
 @Private
 @Unstable
 public class SchedulerUtils {
-  
+
+  private static final Log LOG = LogFactory.getLog(SchedulerUtils.class);
+
   private static final RecordFactory recordFactory = 
       RecordFactoryProvider.getRecordFactory(null);
 
@@ -200,9 +204,14 @@ public class SchedulerUtils {
       String labelExp = resReq.getNodeLabelExpression();
       if (!(RMNodeLabelsManager.NO_LABEL.equals(labelExp)
           || null == labelExp)) {
-        throw new InvalidLabelResourceRequestException(
-            "Invalid resource request, node label not enabled "
-                + "but request contains label expression");
+        String message = "NodeLabel is not enabled in cluster, but resource"
+            + " request contains a label expression.";
+        LOG.warn(message);
+        if (!isRecovery) {
+          throw new InvalidLabelResourceRequestException(
+              "Invalid resource request, node label not enabled "
+                  + "but request contains label expression");
+        }
       }
     }
     if (null == queueInfo) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4006cc26/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
index af4ef41..88562ca 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
@@ -2485,14 +2485,14 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase
{
       }
     };
 
-    // rm should successfully start with app1 loaded back in FAILED state
-    // due to node label not enabled but am resource request contains
-    // node label expression.
+    // rm should successfully start with app1 loaded back in SUCCESS state
+    // by pushing app to run default label for am container and let other
+    // containers to run normally.
+
     try {
       rm2.start();
       Assert.assertTrue("RM start successfully", true);
       Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
-      rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
     } catch (Exception e) {
       LOG.debug("Exception on start", e);
       Assert.fail("RM should start without any issue");


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message