hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ka...@apache.org
Subject svn commit: r1613551 - in /hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src: main/java/org/apache/hadoop/yarn/sls/appmaster/ main/java/org/apache/hadoop/yarn/sls/nodemanager/ main/java/org/apache/hadoop/yarn/sls/scheduler/ test/java/org/ap...
Date Sat, 26 Jul 2014 01:59:47 GMT
Author: kasha
Date: Sat Jul 26 01:59:46 2014
New Revision: 1613551

URL: http://svn.apache.org/r1613551
Log:
YARN-1726. ResourceSchedulerWrapper broken due to AbstractYarnScheduler. (Wei Yan via kasha)

Added:
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/
      - copied from r1613550, hadoop/common/branches/branch-2/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/nodemanager/
      - copied from r1613550, hadoop/common/branches/branch-2/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/nodemanager/
Modified:
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java
    hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java

Modified: hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java?rev=1613551&r1=1613550&r2=1613551&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
(original)
+++ hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
Sat Jul 26 01:59:46 2014
@@ -63,6 +63,8 @@ import org.apache.hadoop.yarn.security.A
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.util.Records;
 import org.apache.log4j.Logger;
 
@@ -133,8 +135,7 @@ public abstract class AMSimulator extend
    * register with RM
    */
   @Override
-  public void firstStep()
-          throws YarnException, IOException, InterruptedException {
+  public void firstStep() throws Exception {
     simulateStartTimeMS = System.currentTimeMillis() - 
                           SLSRunner.getRunner().getStartTimeMS();
 
@@ -149,8 +150,7 @@ public abstract class AMSimulator extend
   }
 
   @Override
-  public void middleStep()
-          throws InterruptedException, YarnException, IOException {
+  public void middleStep() throws Exception {
     // process responses in the queue
     processResponseQueue();
     
@@ -162,7 +162,7 @@ public abstract class AMSimulator extend
   }
 
   @Override
-  public void lastStep() {
+  public void lastStep() throws Exception {
     LOG.info(MessageFormat.format("Application {0} is shutting down.", appId));
     // unregister tracking
     if (isTracked) {
@@ -173,26 +173,19 @@ public abstract class AMSimulator extend
                   .newRecordInstance(FinishApplicationMasterRequest.class);
     finishAMRequest.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
 
-    try {
-      UserGroupInformation ugi =
-              UserGroupInformation.createRemoteUser(appAttemptId.toString());
-      Token<AMRMTokenIdentifier> token =
-              rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
-                .getRMAppAttempt(appAttemptId).getAMRMToken();
-      ugi.addTokenIdentifier(token.decodeIdentifier());
-      ugi.doAs(new PrivilegedExceptionAction<Object>() {
-        @Override
-        public Object run() throws Exception {
-          rm.getApplicationMasterService()
-                  .finishApplicationMaster(finishAMRequest);
-          return null;
-        }
-      });
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (InterruptedException e) {
-      e.printStackTrace();
-    }
+    UserGroupInformation ugi =
+        UserGroupInformation.createRemoteUser(appAttemptId.toString());
+    Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps().get(appId)
+        .getRMAppAttempt(appAttemptId).getAMRMToken();
+    ugi.addTokenIdentifier(token.decodeIdentifier());
+    ugi.doAs(new PrivilegedExceptionAction<Object>() {
+      @Override
+      public Object run() throws Exception {
+        rm.getApplicationMasterService()
+            .finishApplicationMaster(finishAMRequest);
+        return null;
+      }
+    });
 
     simulateFinishTimeMS = System.currentTimeMillis() -
         SLSRunner.getRunner().getStartTimeMS();
@@ -230,11 +223,9 @@ public abstract class AMSimulator extend
     return createAllocateRequest(ask, new ArrayList<ContainerId>());
   }
 
-  protected abstract void processResponseQueue()
-          throws InterruptedException, YarnException, IOException;
+  protected abstract void processResponseQueue() throws Exception;
   
-  protected abstract void sendContainerRequest()
-          throws YarnException, IOException, InterruptedException;
+  protected abstract void sendContainerRequest() throws Exception;
   
   protected abstract void checkStop();
   
@@ -280,11 +271,18 @@ public abstract class AMSimulator extend
     // waiting until application ACCEPTED
     RMApp app = rm.getRMContext().getRMApps().get(appId);
     while(app.getState() != RMAppState.ACCEPTED) {
-      Thread.sleep(50);
+      Thread.sleep(10);
     }
 
-    appAttemptId = rm.getRMContext().getRMApps().get(appId)
-            .getCurrentAppAttempt().getAppAttemptId();
+    // Waiting until application attempt reach LAUNCHED
+    // "Unmanaged AM must register after AM attempt reaches LAUNCHED state"
+    this.appAttemptId = rm.getRMContext().getRMApps().get(appId)
+        .getCurrentAppAttempt().getAppAttemptId();
+    RMAppAttempt rmAppAttempt = rm.getRMContext().getRMApps().get(appId)
+        .getCurrentAppAttempt();
+    while (rmAppAttempt.getAppAttemptState() != RMAppAttemptState.LAUNCHED) {
+      Thread.sleep(10);
+    }
   }
 
   private void registerAM()
@@ -297,10 +295,9 @@ public abstract class AMSimulator extend
     amRegisterRequest.setTrackingUrl("localhost:1000");
 
     UserGroupInformation ugi =
-            UserGroupInformation.createRemoteUser(appAttemptId.toString());
-    Token<AMRMTokenIdentifier> token =
-            rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
-                    .getRMAppAttempt(appAttemptId).getAMRMToken();
+        UserGroupInformation.createRemoteUser(appAttemptId.toString());
+    Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps().get(appId)
+        .getRMAppAttempt(appAttemptId).getAMRMToken();
     ugi.addTokenIdentifier(token.decodeIdentifier());
 
     ugi.doAs(

Modified: hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java?rev=1613551&r1=1613550&r2=1613551&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
(original)
+++ hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
Sat Jul 26 01:59:46 2014
@@ -145,8 +145,7 @@ public class MRAMSimulator extends AMSim
   }
 
   @Override
-  public void firstStep()
-          throws YarnException, IOException, InterruptedException {
+  public void firstStep() throws Exception {
     super.firstStep();
     
     requestAMContainer();
@@ -390,7 +389,7 @@ public class MRAMSimulator extends AMSim
   }
 
   @Override
-  public void lastStep() {
+  public void lastStep() throws Exception {
     super.lastStep();
 
     // clear data structures

Modified: hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java?rev=1613551&r1=1613550&r2=1613551&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
(original)
+++ hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
Sat Jul 26 01:59:46 2014
@@ -27,6 +27,7 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.DelayQueue;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -107,12 +108,12 @@ public class NMSimulator extends TaskRun
   }
 
   @Override
-  public void firstStep() throws YarnException, IOException {
+  public void firstStep() {
     // do nothing
   }
 
   @Override
-  public void middleStep() {
+  public void middleStep() throws Exception {
     // we check the lifetime for each running containers
     ContainerSimulator cs = null;
     synchronized(completedContainerList) {
@@ -136,37 +137,31 @@ public class NMSimulator extends TaskRun
     ns.setResponseId(RESPONSE_ID ++);
     ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
     beatRequest.setNodeStatus(ns);
-    try {
-      NodeHeartbeatResponse beatResponse =
-              rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
-      if (! beatResponse.getContainersToCleanup().isEmpty()) {
-        // remove from queue
-        synchronized(releasedContainerList) {
-          for (ContainerId containerId : beatResponse.getContainersToCleanup()){
-            if (amContainerList.contains(containerId)) {
-              // AM container (not killed?, only release)
-              synchronized(amContainerList) {
-                amContainerList.remove(containerId);
-              }
-              LOG.debug(MessageFormat.format("NodeManager {0} releases " +
-                      "an AM ({1}).", node.getNodeID(), containerId));
-            } else {
-              cs = runningContainers.remove(containerId);
-              containerQueue.remove(cs);
-              releasedContainerList.add(containerId);
-              LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
-                      "container ({1}).", node.getNodeID(), containerId));
+    NodeHeartbeatResponse beatResponse =
+        rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
+    if (! beatResponse.getContainersToCleanup().isEmpty()) {
+      // remove from queue
+      synchronized(releasedContainerList) {
+        for (ContainerId containerId : beatResponse.getContainersToCleanup()){
+          if (amContainerList.contains(containerId)) {
+            // AM container (not killed?, only release)
+            synchronized(amContainerList) {
+              amContainerList.remove(containerId);
             }
+            LOG.debug(MessageFormat.format("NodeManager {0} releases " +
+                "an AM ({1}).", node.getNodeID(), containerId));
+          } else {
+            cs = runningContainers.remove(containerId);
+            containerQueue.remove(cs);
+            releasedContainerList.add(containerId);
+            LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
+                "container ({1}).", node.getNodeID(), containerId));
           }
         }
       }
-      if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
-        lastStep();
-      }
-    } catch (YarnException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
+    }
+    if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
+      lastStep();
     }
   }
 
@@ -262,4 +257,19 @@ public class NMSimulator extends TaskRun
       completedContainerList.add(containerId);
     }
   }
+
+  @VisibleForTesting
+  Map<ContainerId, ContainerSimulator> getRunningContainers() {
+    return runningContainers;
+  }
+
+  @VisibleForTesting
+  List<ContainerId> getAMContainers() {
+    return amContainerList;
+  }
+
+  @VisibleForTesting
+  List<ContainerId> getCompletedContainers() {
+    return completedContainerList;
+  }
 }

Modified: hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java?rev=1613551&r1=1613550&r2=1613551&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
(original)
+++ hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
Sat Jul 26 01:59:46 2014
@@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.res
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
@@ -101,7 +102,6 @@ public class ResourceSchedulerWrapper
   private static final String EOL = System.getProperty("line.separator");
   private static final int SAMPLING_SIZE = 60;
   private ScheduledExecutorService pool;
-  private RMContext rmContext;
   // counters for scheduler allocate/handle operations
   private Counter schedulerAllocateCounter;
   private Counter schedulerHandleCounter;
@@ -577,7 +577,7 @@ public class ResourceSchedulerWrapper
       new Gauge<Integer>() {
         @Override
         public Integer getValue() {
-          if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
+          if (scheduler == null || scheduler.getRootQueueMetrics() == null) {
             return 0;
           } else {
             return scheduler.getRootQueueMetrics().getAppsRunning();
@@ -724,17 +724,18 @@ public class ResourceSchedulerWrapper
   public void addAMRuntime(ApplicationId appId,
                            long traceStartTimeMS, long traceEndTimeMS,
                            long simulateStartTimeMS, long simulateEndTimeMS) {
-
-    try {
-      // write job runtime information
-      StringBuilder sb = new StringBuilder();
-      sb.append(appId).append(",").append(traceStartTimeMS).append(",")
-              .append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
-              .append(",").append(simulateEndTimeMS);
-      jobRuntimeLogBW.write(sb.toString() + EOL);
-      jobRuntimeLogBW.flush();
-    } catch (IOException e) {
-      e.printStackTrace();
+    if (metricsON) {
+      try {
+        // write job runtime information
+        StringBuilder sb = new StringBuilder();
+        sb.append(appId).append(",").append(traceStartTimeMS).append(",")
+            .append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
+            .append(",").append(simulateEndTimeMS);
+        jobRuntimeLogBW.write(sb.toString() + EOL);
+        jobRuntimeLogBW.flush();
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
     }
   }
 
@@ -920,5 +921,18 @@ public class ResourceSchedulerWrapper
   public Resource getClusterResource() {
     return null;
   }
+
+  @Override
+  public synchronized List<Container> getTransferredContainers(
+      ApplicationAttemptId currentAttempt) {
+    return new ArrayList<Container>();
+  }
+
+  @Override
+  public Map<ApplicationId, SchedulerApplication<SchedulerApplicationAttempt>>
+      getSchedulerApplications() {
+    return new HashMap<ApplicationId,
+        SchedulerApplication<SchedulerApplicationAttempt>>();
+  }
 }
 

Modified: hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java?rev=1613551&r1=1613550&r2=1613551&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java
(original)
+++ hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java
Sat Jul 26 01:59:46 2014
@@ -99,12 +99,10 @@ public class TaskRunner {
         } else {
           lastStep();
         }
-      } catch (YarnException e) {
-        e.printStackTrace();
-      } catch (IOException e) {
-        e.printStackTrace();
-      } catch (InterruptedException e) {
+      } catch (Exception e) {
         e.printStackTrace();
+        Thread.getDefaultUncaughtExceptionHandler()
+            .uncaughtException(Thread.currentThread(), e);
       }
     }
 
@@ -124,13 +122,11 @@ public class TaskRunner {
     }
 
 
-    public abstract void firstStep()
-            throws YarnException, IOException, InterruptedException;
+    public abstract void firstStep() throws Exception;
 
-    public abstract void middleStep()
-            throws YarnException, InterruptedException, IOException;
+    public abstract void middleStep() throws Exception;
 
-    public abstract void lastStep() throws YarnException;
+    public abstract void lastStep() throws Exception;
 
     public void setEndTime(long et) {
       endTime = et;

Modified: hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java?rev=1613551&r1=1613550&r2=1613551&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java
(original)
+++ hadoop/common/branches/branch-2.5/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java
Sat Jul 26 01:59:46 2014
@@ -18,10 +18,13 @@
 
 package org.apache.hadoop.yarn.sls;
 
-import org.apache.commons.io.FileUtils;
+import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 import java.util.UUID;
 
 public class TestSLSRunner {
@@ -30,6 +33,15 @@ public class TestSLSRunner {
   @SuppressWarnings("all")
   public void testSimulatorRunning() throws Exception {
     File tempDir = new File("target", UUID.randomUUID().toString());
+    final List<Throwable> exceptionList =
+        Collections.synchronizedList(new ArrayList<Throwable>());
+
+    Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
+      @Override
+      public void uncaughtException(Thread t, Throwable e) {
+        exceptionList.add(e);
+      }
+    });
 
     // start the simulator
     File slsOutputDir = new File(tempDir.getAbsolutePath() + "/slsoutput/");
@@ -38,8 +50,20 @@ public class TestSLSRunner {
             "-output", slsOutputDir.getAbsolutePath()};
     SLSRunner.main(args);
 
-    // wait for 45 seconds before stop
-    Thread.sleep(45 * 1000);
+    // wait for 20 seconds before stop
+    int count = 20;
+    while (count >= 0) {
+      Thread.sleep(1000);
+
+      if (! exceptionList.isEmpty()) {
+        SLSRunner.getRunner().stop();
+        Assert.fail("TestSLSRunner catched exception from child thread " +
+            "(TaskRunner.Task): " + exceptionList.get(0).getMessage());
+        break;
+      }
+      count--;
+    }
+
     SLSRunner.getRunner().stop();
   }
 



Mime
View raw message