hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xg...@apache.org
Subject git commit: YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into DistributedShell. Contributed by Xuan Gong
Date Tue, 16 Sep 2014 18:03:06 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 9d34dc87e -> 03fdbd789


YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into
DistributedShell. Contributed by Xuan Gong


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/03fdbd78
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/03fdbd78
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/03fdbd78

Branch: refs/heads/branch-2
Commit: 03fdbd7899c78d13dd173487dbbd3da83712e2ac
Parents: 9d34dc8
Author: XuanGong <xgong@apache.org>
Authored: Tue Sep 16 11:01:26 2014 -0700
Committer: XuanGong <xgong@apache.org>
Committed: Tue Sep 16 11:01:26 2014 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 +
 .../applications/distributedshell/Client.java   | 18 ++++-
 .../TestDSSleepingAppMaster.java                | 58 +++++++++++++++
 .../distributedshell/TestDistributedShell.java  | 76 ++++++++++++++++++++
 4 files changed, 154 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fdbd78/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 295bb9e..6697b2d 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -341,6 +341,9 @@ Release 2.6.0 - UNRELEASED
     YARN-2529. Generic history service RPC interface doesn't work when service
     authorization is enabled. (Zhijie Shen via jianhe)
 
+    YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into
+    DistributedShell. (xgong)
+
 Release 2.5.1 - 2014-09-05
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fdbd78/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
index a86b521..f3ce64c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
@@ -75,7 +75,6 @@ import org.apache.hadoop.yarn.client.api.YarnClientApplication;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.apache.hadoop.yarn.util.Records;
 
 /**
  * Client for Distributed Shell application submission to YARN.
@@ -163,6 +162,8 @@ public class Client {
   // flag to indicate whether to keep containers across application attempts.
   private boolean keepContainers = false;
 
+  private long attemptFailuresValidityInterval = -1;
+
   // Debug flag
   boolean debugFlag = false;	
 
@@ -248,6 +249,12 @@ public class Client {
       " If the flag is true, running containers will not be killed when" +
       " application attempt fails and these containers will be retrieved by" +
       " the new application attempt ");
+    opts.addOption("attempt_failures_validity_interval", true,
+      "when attempt_failures_validity_interval in milliseconds is set to > 0," +
+      "the failure number will not take failures which happen out of " +
+      "the validityInterval into failure count. " +
+      "If failure count reaches to maxAppAttempts, " +
+      "the application will be failed.");
     opts.addOption("debug", false, "Dump out debug information");
     opts.addOption("help", false, "Print usage");
 
@@ -372,6 +379,10 @@ public class Client {
 
     clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000"));
 
+    attemptFailuresValidityInterval =
+        Long.parseLong(cliParser.getOptionValue(
+          "attempt_failures_validity_interval", "-1"));
+
     log4jPropFile = cliParser.getOptionValue("log_properties", "");
 
     return true;
@@ -456,6 +467,11 @@ public class Client {
     appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
     appContext.setApplicationName(appName);
 
+    if (attemptFailuresValidityInterval >= 0) {
+      appContext
+        .setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
+    }
+
     // set local resources for the application master
     // local files or archives as needed
     // In this scenario, the jar file for the application master is part of the local resources
		

http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fdbd78/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
new file mode 100644
index 0000000..3004b69
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.applications.distributedshell;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class TestDSSleepingAppMaster extends ApplicationMaster{
+
+  private static final Log LOG = LogFactory.getLog(TestDSSleepingAppMaster.class);
+  private static final long SLEEP_TIME = 5000;
+
+  public static void main(String[] args) {
+    boolean result = false;
+    try {
+      TestDSSleepingAppMaster appMaster = new TestDSSleepingAppMaster();
+      boolean doRun = appMaster.init(args);
+      if (!doRun) {
+        System.exit(0);
+      }
+      appMaster.run();
+      if (appMaster.appAttemptID.getAttemptId() <= 2) {
+        try {
+          // sleep some time
+          Thread.sleep(SLEEP_TIME);
+        } catch (InterruptedException e) {}
+        // fail the first am.
+        System.exit(100);
+      }
+      result = appMaster.finish();
+    } catch (Throwable t) {
+      System.exit(1);
+    }
+    if (result) {
+      LOG.info("Application Master completed successfully. exiting");
+      System.exit(0);
+    } else {
+      LOG.info("Application Master failed. exiting");
+      System.exit(2);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fdbd78/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
index d7a1745..6dff94c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
@@ -308,6 +308,82 @@ public class TestDistributedShell {
       Assert.assertTrue(result);
     }
 
+  /*
+   * The sleeping period in TestDSSleepingAppMaster is set as 5 seconds.
+   * Set attempt_failures_validity_interval as 2.5 seconds. It will check
+   * how many attempt failures for previous 2.5 seconds.
+   * The application is expected to be successful.
+   */
+  @Test(timeout=90000)
+  public void testDSAttemptFailuresValidityIntervalSucess() throws Exception {
+    String[] args = {
+        "--jar",
+        APPMASTER_JAR,
+        "--num_containers",
+        "1",
+        "--shell_command",
+        "sleep 8",
+        "--master_memory",
+        "512",
+        "--container_memory",
+        "128",
+        "--attempt_failures_validity_interval",
+        "2500"
+      };
+
+      LOG.info("Initializing DS Client");
+      Configuration conf = yarnCluster.getConfig();
+      conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
+      Client client = new Client(TestDSSleepingAppMaster.class.getName(),
+        new Configuration(conf));
+
+      client.init(args);
+      LOG.info("Running DS Client");
+      boolean result = client.run();
+
+      LOG.info("Client run completed. Result=" + result);
+      // application should succeed
+      Assert.assertTrue(result);
+    }
+
+  /*
+   * The sleeping period in TestDSSleepingAppMaster is set as 5 seconds.
+   * Set attempt_failures_validity_interval as 15 seconds. It will check
+   * how many attempt failure for previous 15 seconds.
+   * The application is expected to be fail.
+   */
+  @Test(timeout=90000)
+  public void testDSAttemptFailuresValidityIntervalFailed() throws Exception {
+    String[] args = {
+        "--jar",
+        APPMASTER_JAR,
+        "--num_containers",
+        "1",
+        "--shell_command",
+        "sleep 8",
+        "--master_memory",
+        "512",
+        "--container_memory",
+        "128",
+        "--attempt_failures_validity_interval",
+        "15000"
+      };
+
+      LOG.info("Initializing DS Client");
+      Configuration conf = yarnCluster.getConfig();
+      conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
+      Client client = new Client(TestDSSleepingAppMaster.class.getName(),
+        new Configuration(conf));
+
+      client.init(args);
+      LOG.info("Running DS Client");
+      boolean result = client.run();
+
+      LOG.info("Client run completed. Result=" + result);
+      // application should be failed
+      Assert.assertFalse(result);
+    }
+
   @Test(timeout=90000)
   public void testDSShellWithCustomLogPropertyFile() throws Exception {
     final File basedir =


Mime
View raw message