hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xg...@apache.org
Subject hadoop git commit: YARN-4087. Followup fixes after YARN-2019 regarding RM behavior when state-store error occurs. Contributed by Jian He
Date Tue, 08 Sep 2015 00:47:42 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 96b9455c6 -> b55fb0ac4


YARN-4087. Followup fixes after YARN-2019 regarding RM behavior when
state-store error occurs. Contributed by Jian He

(cherry picked from commit 9b78e6e33d8c117c1e909df414f20d9db56efe4b)
(cherry picked from commit a0b7ef15d0663076b65ae3f53271b54e42308bfb)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b55fb0ac
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b55fb0ac
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b55fb0ac

Branch: refs/heads/branch-2.7
Commit: b55fb0ac44c87c809c32de54654724b1374b66ae
Parents: 96b9455
Author: Xuan <xgong@apache.org>
Authored: Mon Sep 7 17:45:47 2015 -0700
Committer: Xuan <xgong@apache.org>
Committed: Mon Sep 7 17:47:32 2015 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                      |  3 +++
 .../apache/hadoop/yarn/conf/YarnConfiguration.java   |  2 +-
 .../src/main/resources/yarn-default.xml              |  5 ++++-
 .../resourcemanager/recovery/RMStateStore.java       | 15 +++++++++------
 4 files changed, 17 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b55fb0ac/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index cc5a02f..5463d37 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -76,6 +76,9 @@ Release 2.7.2 - UNRELEASED
     YARN-4105. Capacity Scheduler headroom for DRF is wrong (Chang Li via
     jlowe)
 
+    YARN-4087. Followup fixes after YARN-2019 regarding RM behavior when
+    state-store error occurs. (Jian He via xgong)
+
 Release 2.7.1 - 2015-07-06
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b55fb0ac/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index f98db44..7b71be6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -402,7 +402,7 @@ public class YarnConfiguration extends Configuration {
   public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false;
 
   public static final String YARN_FAIL_FAST = YARN_PREFIX + "fail-fast";
-  public static final boolean DEFAULT_YARN_FAIL_FAST = true;
+  public static final boolean DEFAULT_YARN_FAIL_FAST = false;
 
   public static final String RM_FAIL_FAST = RM_PREFIX + "fail-fast";
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b55fb0ac/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 4827b9a..cb3ad97 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -298,9 +298,12 @@
 
   <property>
     <description>Should YARN fail fast if it encounters any errors.
+      This is a global config for all other components including RM,NM etc.
+      If no value is set for component-specific config (e.g yarn.resourcemanager.fail-fast),
+      this value will be the default.
     </description>
     <name>yarn.fail-fast</name>
-    <value>true</value>
+    <value>false</value>
   </property>
 
   <property>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b55fb0ac/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
index 95977ea..071b5c6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
+import org.apache.hadoop.yarn.conf.HAUtil;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
@@ -857,18 +858,20 @@ public abstract class RMStateStore extends AbstractService {
    */
   protected void notifyStoreOperationFailed(Exception failureCause) {
     LOG.error("State store operation failed ", failureCause);
-    if (failureCause instanceof StoreFencedException) {
+    if (HAUtil.isHAEnabled(getConfig())) {
+      LOG.warn("State-store fenced ! Transitioning RM to standby");
       updateFencedState();
       Thread standByTransitionThread =
           new Thread(new StandByTransitionThread());
       standByTransitionThread.setName("StandByTransitionThread Handler");
       standByTransitionThread.start();
+    } else if (YarnConfiguration.shouldRMFailFast(getConfig())) {
+      LOG.fatal("Fail RM now due to state-store error!");
+      rmDispatcher.getEventHandler().handle(
+          new RMFatalEvent(RMFatalEventType.STATE_STORE_OP_FAILED,
+              failureCause));
     } else {
-      if (YarnConfiguration.shouldRMFailFast(getConfig())) {
-        rmDispatcher.getEventHandler().handle(
-            new RMFatalEvent(RMFatalEventType.STATE_STORE_OP_FAILED,
-                failureCause));
-      }
+      LOG.warn("Skip the state-store error.");
     }
   }
  


Mime
View raw message