hadoop-yarn-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vino...@apache.org
Subject svn commit: r1529390 - in /hadoop/common/branches/branch-2/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ hadoop-yarn/hadoop-yar...
Date Sat, 05 Oct 2013 06:10:00 GMT
Author: vinodkv
Date: Sat Oct  5 06:10:00 2013
New Revision: 1529390

URL: http://svn.apache.org/r1529390
Log:
YARN-1273. Fixed Distributed-shell to account for containers that failed to start. Contributed
by Hitesh Shah.
svn merge --ignore-ancestry -c 1529389 ../../trunk

Added:
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java
      - copied unchanged from r1529389, hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java
Modified:
    hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1529390&r1=1529389&r2=1529390&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Sat Oct  5 06:10:00 2013
@@ -159,6 +159,9 @@ Release 2.1.2 - UNRELEASED
     YARN-1254. Fixed NodeManager to not pollute container's credentials. (Omkar
     Vinit Joshi via vinodkv)
 
+    YARN-1273. Fixed Distributed-shell to account for containers that failed
+    to start. (Hitesh Shah via vinodkv)
+
 Release 2.1.1-beta - 2013-09-23
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java?rev=1529390&r1=1529389&r2=1529390&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
Sat Oct  5 06:10:00 2013
@@ -34,6 +34,7 @@ import java.util.concurrent.ConcurrentHa
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.HelpFormatter;
@@ -281,8 +282,8 @@ public class ApplicationMaster {
     }
   }
 
-  public ApplicationMaster() throws Exception {
-    // Set up the configuration and RPC
+  public ApplicationMaster() {
+    // Set up the configuration
     conf = new YarnConfiguration();
   }
 
@@ -470,7 +471,7 @@ public class ApplicationMaster {
     amRMClient.init(conf);
     amRMClient.start();
 
-    containerListener = new NMCallbackHandler();
+    containerListener = createNMCallbackHandler();
     nmClientAsync = new NMClientAsyncImpl(containerListener);
     nmClientAsync.init(conf);
     nmClientAsync.start();
@@ -500,7 +501,6 @@ public class ApplicationMaster {
       containerMemory = maxMem;
     }
 
-
     // Setup ask for containers from RM
     // Send request for containers to RM
     // Until we get our fully allocated quota, we keep on polling RM for
@@ -513,7 +513,8 @@ public class ApplicationMaster {
     }
     numRequestedContainers.set(numTotalContainers);
 
-    while (!done) {
+    while (!done
+        && (numCompletedContainers.get() != numTotalContainers)) {
       try {
         Thread.sleep(200);
       } catch (InterruptedException ex) {}
@@ -522,7 +523,12 @@ public class ApplicationMaster {
     
     return success;
   }
-  
+
+  @VisibleForTesting
+  NMCallbackHandler createNMCallbackHandler() {
+    return new NMCallbackHandler(this);
+  }
+
   private void finish() {
     // Join all launched threads
     // needed for when we time out
@@ -566,7 +572,6 @@ public class ApplicationMaster {
       LOG.error("Failed to unregister application", e);
     }
     
-    done = true;
     amRMClient.stop();
   }
   
@@ -679,10 +684,17 @@ public class ApplicationMaster {
     }
   }
 
-  private class NMCallbackHandler implements NMClientAsync.CallbackHandler {
+  @VisibleForTesting
+  static class NMCallbackHandler
+    implements NMClientAsync.CallbackHandler {
 
     private ConcurrentMap<ContainerId, Container> containers =
         new ConcurrentHashMap<ContainerId, Container>();
+    private final ApplicationMaster applicationMaster;
+
+    public NMCallbackHandler(ApplicationMaster applicationMaster) {
+      this.applicationMaster = applicationMaster;
+    }
 
     public void addContainer(ContainerId containerId, Container container) {
       containers.putIfAbsent(containerId, container);
@@ -713,7 +725,7 @@ public class ApplicationMaster {
       }
       Container container = containers.get(containerId);
       if (container != null) {
-        nmClientAsync.getContainerStatusAsync(containerId, container.getNodeId());
+        applicationMaster.nmClientAsync.getContainerStatusAsync(containerId, container.getNodeId());
       }
     }
 
@@ -721,6 +733,8 @@ public class ApplicationMaster {
     public void onStartContainerError(ContainerId containerId, Throwable t) {
       LOG.error("Failed to start Container " + containerId);
       containers.remove(containerId);
+      applicationMaster.numCompletedContainers.incrementAndGet();
+      applicationMaster.numFailedContainers.incrementAndGet();
     }
 
     @Override
@@ -847,7 +861,6 @@ public class ApplicationMaster {
   /**
    * Setup the request that will be sent to the RM for the container ask.
    *
-   * @param numContainers Containers to ask for from RM
    * @return the setup ResourceRequest to be sent to RM
    */
   private ContainerRequest setupContainerAskForRM() {

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java?rev=1529390&r1=1529389&r2=1529390&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
Sat Oct  5 06:10:00 2013
@@ -125,8 +125,7 @@ public class Client {
   // Application master jar file
   private String appMasterJar = ""; 
   // Main class to invoke application master
-  private final String appMasterMainClass =
-      "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster";
+  private final String appMasterMainClass;
 
   // Shell command to be executed 
   private String shellCommand = ""; 
@@ -193,8 +192,14 @@ public class Client {
   /**
    */
   public Client(Configuration conf) throws Exception  {
-    
+    this(
+      "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster",
+      conf);
+  }
+
+  Client(String appMasterMainClass, Configuration conf) {
     this.conf = conf;
+    this.appMasterMainClass = appMasterMainClass;
     yarnClient = YarnClient.createYarnClient();
     yarnClient.init(conf);
     opts = new Options();
@@ -214,6 +219,7 @@ public class Client {
     opts.addOption("log_properties", true, "log4j.properties file");
     opts.addOption("debug", false, "Dump out debug information");
     opts.addOption("help", false, "Print usage");
+
   }
 
   /**

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java?rev=1529390&r1=1529389&r2=1529390&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
Sat Oct  5 06:10:00 2013
@@ -59,7 +59,7 @@ public class TestDistributedShell {
   protected static String APPMASTER_JAR = JarFinder.getJar(ApplicationMaster.class);
 
   @BeforeClass
-  public static void setup() throws InterruptedException, Exception {
+  public static void setup() throws Exception {
     LOG.info("Starting up YARN cluster");
     conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
     conf.setClass(YarnConfiguration.RM_SCHEDULER, 
@@ -135,7 +135,7 @@ public class TestDistributedShell {
         } catch (Exception e) {
           throw new RuntimeException(e);
         }
-      };
+      }
     };
     t.start();
 
@@ -248,5 +248,34 @@ public class TestDistributedShell {
       Thread.sleep(2000);
     }
   }
+
+  @Test(timeout=90000)
+  public void testContainerLaunchFailureHandling() throws Exception {
+    String[] args = {
+      "--jar",
+      APPMASTER_JAR,
+      "--num_containers",
+      "2",
+      "--shell_command",
+      Shell.WINDOWS ? "dir" : "ls",
+      "--master_memory",
+      "512",
+      "--container_memory",
+      "128"
+    };
+
+    LOG.info("Initializing DS Client");
+    Client client = new Client(ContainerLaunchFailAppMaster.class.getName(),
+      new Configuration(yarnCluster.getConfig()));
+    boolean initSuccess = client.init(args);
+    Assert.assertTrue(initSuccess);
+    LOG.info("Running DS Client");
+    boolean result = client.run();
+
+    LOG.info("Client run completed. Result=" + result);
+    Assert.assertFalse(result);
+
+  }
+
 }
 



Mime
View raw message