hadoop-yarn-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vino...@apache.org
Subject svn commit: r1537330 [4/12] - in /hadoop/common/branches/YARN-321/hadoop-yarn-project: ./ hadoop-yarn/bin/ hadoop-yarn/conf/ hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ hadoop-yarn/hadoop-yarn-api/src/main/java...
Date Wed, 30 Oct 2013 22:22:36 GMT
Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java Wed Oct 30 22:21:59 2013
@@ -24,6 +24,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -37,6 +38,10 @@ import org.apache.hadoop.yarn.event.Even
 import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext;
 import org.apache.hadoop.yarn.server.api.AuxiliaryService;
 import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
+import org.apache.hadoop.yarn.server.api.ContainerInitializationContext;
+import org.apache.hadoop.yarn.server.api.ContainerTerminationContext;
+
+import com.google.common.base.Preconditions;
 
 public class AuxServices extends AbstractService
     implements ServiceStateChangeListener, EventHandler<AuxServicesEvent> {
@@ -46,6 +51,8 @@ public class AuxServices extends Abstrac
   protected final Map<String,AuxiliaryService> serviceMap;
   protected final Map<String,ByteBuffer> serviceMetaData;
 
+  private final Pattern p = Pattern.compile("^[A-Za-z_]+[A-Za-z0-9_]*$");
+
   public AuxServices() {
     super(AuxServices.class.getName());
     serviceMap =
@@ -88,6 +95,13 @@ public class AuxServices extends Abstrac
         YarnConfiguration.NM_AUX_SERVICES);
     for (final String sName : auxNames) {
       try {
+        Preconditions
+            .checkArgument(
+                validateAuxServiceName(sName),
+                "The ServiceName: " + sName + " set in " +
+                YarnConfiguration.NM_AUX_SERVICES +" is invalid." +
+                "The valid service name should only contain a-zA-Z0-9_ " +
+                "and can not start with numbers");
         Class<? extends AuxiliaryService> sClass = conf.getClass(
               String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, sName), null,
               AuxiliaryService.class);
@@ -161,26 +175,70 @@ public class AuxServices extends Abstrac
     LOG.info("Got event " + event.getType() + " for appId "
         + event.getApplicationID());
     switch (event.getType()) {
-    case APPLICATION_INIT:
-      LOG.info("Got APPLICATION_INIT for service " + event.getServiceID());
-      AuxiliaryService service = serviceMap.get(event.getServiceID());
-      if (null == service) {
-        LOG.info("service is null");
-        // TODO kill all containers waiting on Application
-        return;
-      }
-      service.initializeApplication(new ApplicationInitializationContext(event
-        .getUser(), event.getApplicationID(), event.getServiceData()));
-      break;
-    case APPLICATION_STOP:
-      for (AuxiliaryService serv : serviceMap.values()) {
-        serv.stopApplication(new ApplicationTerminationContext(event
-          .getApplicationID()));
-      }
-      break;
-    default:
-      throw new RuntimeException("Unknown type: " + event.getType());
+      case APPLICATION_INIT:
+        LOG.info("Got APPLICATION_INIT for service " + event.getServiceID());
+        AuxiliaryService service = null;
+        try {
+          service = serviceMap.get(event.getServiceID());
+          service
+              .initializeApplication(new ApplicationInitializationContext(event
+                  .getUser(), event.getApplicationID(), event.getServiceData()));
+        } catch (Throwable th) {
+          logWarningWhenAuxServiceThrowExceptions(service,
+              AuxServicesEventType.APPLICATION_INIT, th);
+        }
+        break;
+      case APPLICATION_STOP:
+        for (AuxiliaryService serv : serviceMap.values()) {
+          try {
+            serv.stopApplication(new ApplicationTerminationContext(event
+                .getApplicationID()));
+          } catch (Throwable th) {
+            logWarningWhenAuxServiceThrowExceptions(serv,
+                AuxServicesEventType.APPLICATION_STOP, th);
+          }
+        }
+        break;
+      case CONTAINER_INIT:
+        for (AuxiliaryService serv : serviceMap.values()) {
+          try {
+            serv.initializeContainer(new ContainerInitializationContext(
+                event.getUser(), event.getContainer().getContainerId(),
+                event.getContainer().getResource()));
+          } catch (Throwable th) {
+            logWarningWhenAuxServiceThrowExceptions(serv,
+                AuxServicesEventType.CONTAINER_INIT, th);
+          }
+        }
+        break;
+      case CONTAINER_STOP:
+        for (AuxiliaryService serv : serviceMap.values()) {
+          try {
+            serv.stopContainer(new ContainerTerminationContext(
+                event.getUser(), event.getContainer().getContainerId(),
+                event.getContainer().getResource()));
+          } catch (Throwable th) {
+            logWarningWhenAuxServiceThrowExceptions(serv,
+                AuxServicesEventType.CONTAINER_STOP, th);
+          }
+        }
+        break;
+      default:
+        throw new RuntimeException("Unknown type: " + event.getType());
     }
   }
 
+  private boolean validateAuxServiceName(String name) {
+    if (name == null || name.trim().isEmpty()) {
+      return false;
+    }
+    return p.matcher(name).matches();
+  }
+
+  private void logWarningWhenAuxServiceThrowExceptions(AuxiliaryService service,
+      AuxServicesEventType eventType, Throwable th) {
+    LOG.warn((null == service ? "The auxService is null"
+        : "The auxService name is " + service.getName())
+        + " and it got an error at event: " + eventType, th);
+  }
 }

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEvent.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEvent.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEvent.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEvent.java Wed Oct 30 22:21:59 2013
@@ -21,7 +21,10 @@ package org.apache.hadoop.yarn.server.no
 import java.nio.ByteBuffer;
 
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.event.AbstractEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container
+    .Container;
 
 public class AuxServicesEvent extends AbstractEvent<AuxServicesEventType> {
 
@@ -29,18 +32,30 @@ public class AuxServicesEvent extends Ab
   private final String serviceId;
   private final ByteBuffer serviceData;
   private final ApplicationId appId;
+  private final Container container;
 
   public AuxServicesEvent(AuxServicesEventType eventType, ApplicationId appId) {
     this(eventType, null, appId, null, null);
   }
 
+  public AuxServicesEvent(AuxServicesEventType eventType, Container container) {
+    this(eventType, null, container.getContainerId().getApplicationAttemptId()
+        .getApplicationId(), null, null, container);
+  }
+
   public AuxServicesEvent(AuxServicesEventType eventType, String user,
       ApplicationId appId, String serviceId, ByteBuffer serviceData) {
+    this(eventType, user, appId, serviceId, serviceData, null);
+  }
+    public AuxServicesEvent(AuxServicesEventType eventType, String user,
+      ApplicationId appId, String serviceId, ByteBuffer serviceData,
+        Container container) {
     super(eventType);
     this.user = user;
     this.appId = appId;
     this.serviceId = serviceId;
     this.serviceData = serviceData;
+    this.container = container;
   }
 
   public String getServiceID() {
@@ -59,4 +74,8 @@ public class AuxServicesEvent extends Ab
     return appId;
   }
 
+  public Container getContainer() {
+    return container;
+  }
+
 }

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEventType.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEventType.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEventType.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServicesEventType.java Wed Oct 30 22:21:59 2013
@@ -20,5 +20,7 @@ package org.apache.hadoop.yarn.server.no
 
 public enum AuxServicesEventType {
   APPLICATION_INIT,
-  APPLICATION_STOP
+  APPLICATION_STOP,
+  CONTAINER_INIT,
+  CONTAINER_STOP
 }

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java Wed Oct 30 22:21:59 2013
@@ -30,6 +30,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -67,9 +70,11 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.exceptions.InvalidAuxServiceException;
 import org.apache.hadoop.yarn.exceptions.InvalidContainerException;
 import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
@@ -119,6 +124,11 @@ public class ContainerManagerImpl extend
     ServiceStateChangeListener, ContainerManagementProtocol,
     EventHandler<ContainerManagerEvent> {
 
+  /**
+   * Extra duration to wait for applications to be killed on shutdown.
+   */
+  private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000;
+
   private static final Log LOG = LogFactory.getLog(ContainerManagerImpl.class);
 
   final Context context;
@@ -137,6 +147,11 @@ public class ContainerManagerImpl extend
 
   private final DeletionService deletionService;
   private AtomicBoolean blockNewContainerRequests = new AtomicBoolean(false);
+  private boolean serviceStopped = false;
+  private final ReadLock readLock;
+  private final WriteLock writeLock;
+
+  private long waitForContainersOnShutdownMillis;
 
   public ContainerManagerImpl(Context context, ContainerExecutor exec,
       DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
@@ -180,6 +195,10 @@ public class ContainerManagerImpl extend
     dispatcher.register(ContainersLauncherEventType.class, containersLauncher);
     
     addService(dispatcher);
+
+    ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+    this.readLock = lock.readLock();
+    this.writeLock = lock.writeLock();
   }
 
   @Override
@@ -189,6 +208,13 @@ public class ContainerManagerImpl extend
     addIfService(logHandler);
     dispatcher.register(LogHandlerEventType.class, logHandler);
     
+    waitForContainersOnShutdownMillis =
+        conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
+            YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) +
+        conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS,
+            YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) +
+        SHUTDOWN_CLEANUP_SLOP_MS;
+
     super.serviceInit(conf);
   }
 
@@ -216,7 +242,7 @@ public class ContainerManagerImpl extend
 
   protected ContainersLauncher createContainersLauncher(Context context,
       ContainerExecutor exec) {
-    return new ContainersLauncher(context, this.dispatcher, exec, dirsHandler);
+    return new ContainersLauncher(context, this.dispatcher, exec, dirsHandler, this);
   }
 
   @Override
@@ -274,6 +300,16 @@ public class ContainerManagerImpl extend
 
   @Override
   public void serviceStop() throws Exception {
+    setBlockNewContainerRequests(true);
+    this.writeLock.lock();
+    try {
+      serviceStopped = true;
+      if (context != null) {
+        cleanUpApplicationsOnNMShutDown();
+      }
+    } finally {
+      this.writeLock.unlock();
+    }
     if (auxiliaryServices.getServiceState() == STARTED) {
       auxiliaryServices.unregisterServiceListener(this);
     }
@@ -283,6 +319,76 @@ public class ContainerManagerImpl extend
     super.serviceStop();
   }
 
+  public void cleanUpApplicationsOnNMShutDown() {
+    Map<ApplicationId, Application> applications =
+        this.context.getApplications();
+    if (applications.isEmpty()) {
+      return;
+    }
+    LOG.info("Applications still running : " + applications.keySet());
+
+    List<ApplicationId> appIds =
+        new ArrayList<ApplicationId>(applications.keySet());
+    this.handle(
+        new CMgrCompletedAppsEvent(appIds,
+            CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
+
+    LOG.info("Waiting for Applications to be Finished");
+
+    long waitStartTime = System.currentTimeMillis();
+    while (!applications.isEmpty()
+        && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException ex) {
+        LOG.warn(
+          "Interrupted while sleeping on applications finish on shutdown", ex);
+      }
+    }
+
+    // All applications Finished
+    if (applications.isEmpty()) {
+      LOG.info("All applications in FINISHED state");
+    } else {
+      LOG.info("Done waiting for Applications to be Finished. Still alive: " +
+          applications.keySet());
+    }
+  }
+
+  public void cleanupContainersOnNMResync() {
+    Map<ContainerId, Container> containers = context.getContainers();
+    if (containers.isEmpty()) {
+      return;
+    }
+    LOG.info("Containers still running on "
+        + CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC + " : "
+        + containers.keySet());
+
+    List<ContainerId> containerIds =
+      new ArrayList<ContainerId>(containers.keySet());
+
+    LOG.info("Waiting for containers to be killed");
+
+    this.handle(new CMgrCompletedContainersEvent(containerIds,
+      CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC));
+    while (!containers.isEmpty()) {
+      try {
+        Thread.sleep(1000);
+        nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext();
+      } catch (InterruptedException ex) {
+        LOG.warn("Interrupted while sleeping on container kill on resync", ex);
+      }
+    }
+
+    // All containers killed
+    if (containers.isEmpty()) {
+      LOG.info("All containers in DONE state");
+    } else {
+      LOG.info("Done waiting for containers to be killed. Still alive: " +
+        containers.keySet());
+    }
+  }
+
   // Get the remoteUGI corresponding to the api call.
   protected UserGroupInformation getRemoteUgi()
       throws YarnException {
@@ -410,7 +516,7 @@ public class ContainerManagerImpl extend
       }
     }
 
-    return StartContainersResponse.newInstance(auxiliaryServices.getMetaData(),
+    return StartContainersResponse.newInstance(getAuxServiceMetaData(),
       succeededContainers, failedContainers);
   }
 
@@ -451,6 +557,18 @@ public class ContainerManagerImpl extend
 
     ContainerLaunchContext launchContext = request.getContainerLaunchContext();
 
+    Map<String, ByteBuffer> serviceData = getAuxServiceMetaData();
+    if (launchContext.getServiceData()!=null && 
+        !launchContext.getServiceData().isEmpty()) {
+      for (Map.Entry<String, ByteBuffer> meta : launchContext.getServiceData()
+          .entrySet()) {
+        if (null == serviceData.get(meta.getKey())) {
+          throw new InvalidAuxServiceException("The auxService:" + meta.getKey()
+              + " does not exist");
+        }
+      }
+    }
+
     Credentials credentials = parseCredentials(launchContext);
 
     Container container =
@@ -466,29 +584,40 @@ public class ContainerManagerImpl extend
           + " already is running on this node!!");
     }
 
-    // Create the application
-    Application application =
-        new ApplicationImpl(dispatcher, user, applicationID, credentials, context);
-    if (null == context.getApplications().putIfAbsent(applicationID,
-      application)) {
-      LOG.info("Creating a new application reference for app " + applicationID);
-
-      dispatcher.getEventHandler().handle(
-        new ApplicationInitEvent(applicationID, container.getLaunchContext()
-          .getApplicationACLs()));
-    }
+    this.readLock.lock();
+    try {
+      if (!serviceStopped) {
+        // Create the application
+        Application application =
+            new ApplicationImpl(dispatcher, user, applicationID, credentials, context);
+        if (null == context.getApplications().putIfAbsent(applicationID,
+          application)) {
+          LOG.info("Creating a new application reference for app " + applicationID);
+
+          dispatcher.getEventHandler().handle(
+            new ApplicationInitEvent(applicationID, container.getLaunchContext()
+              .getApplicationACLs()));
+        }
 
-    dispatcher.getEventHandler().handle(
-      new ApplicationContainerInitEvent(container));
+        dispatcher.getEventHandler().handle(
+          new ApplicationContainerInitEvent(container));
 
-    this.context.getContainerTokenSecretManager().startContainerSuccessful(
-      containerTokenIdentifier);
-    NMAuditLogger.logSuccess(user, AuditConstants.START_CONTAINER,
-      "ContainerManageImpl", applicationID, containerId);
-    // TODO launchedContainer misplaced -> doesn't necessarily mean a container
-    // launch. A finished Application will not launch containers.
-    metrics.launchedContainer();
-    metrics.allocateContainer(containerTokenIdentifier.getResource()); 
+        this.context.getContainerTokenSecretManager().startContainerSuccessful(
+          containerTokenIdentifier);
+        NMAuditLogger.logSuccess(user, AuditConstants.START_CONTAINER,
+          "ContainerManageImpl", applicationID, containerId);
+        // TODO launchedContainer misplaced -> doesn't necessarily mean a container
+        // launch. A finished Application will not launch containers.
+        metrics.launchedContainer();
+        metrics.allocateContainer(containerTokenIdentifier.getResource());
+      } else {
+        throw new YarnException(
+            "Container start failed as the NodeManager is " +
+            "in the process of shutting down");
+      }
+    } finally {
+      this.readLock.unlock();
+    }
   }
 
   protected ContainerTokenIdentifier verifyAndGetContainerTokenIdentifier(
@@ -713,9 +842,15 @@ public class ContainerManagerImpl extend
       CMgrCompletedAppsEvent appsFinishedEvent =
           (CMgrCompletedAppsEvent) event;
       for (ApplicationId appID : appsFinishedEvent.getAppsToCleanup()) {
+        String diagnostic = "";
+        if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN) {
+          diagnostic = "Application killed on shutdown";
+        } else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) {
+          diagnostic = "Application killed by ResourceManager";
+        }
         this.dispatcher.getEventHandler().handle(
             new ApplicationFinishEvent(appID,
-                "Application Killed by ResourceManager"));
+                diagnostic));
       }
       break;
     case FINISH_CONTAINERS:
@@ -723,20 +858,14 @@ public class ContainerManagerImpl extend
           (CMgrCompletedContainersEvent) event;
       for (ContainerId container : containersFinishedEvent
           .getContainersToCleanup()) {
-        String diagnostic = "";
-        if (containersFinishedEvent.getReason() == 
-            CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN) {
-          diagnostic = "Container Killed on Shutdown";
-        } else if (containersFinishedEvent.getReason() == 
-            CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER) {
-          diagnostic = "Container Killed by ResourceManager";
-        }
-        this.dispatcher.getEventHandler().handle(
-            new ContainerKillEvent(container, diagnostic));
+          this.dispatcher.getEventHandler().handle(
+              new ContainerKillEvent(container,
+                  "Container Killed by ResourceManager"));
       }
       break;
     default:
-      LOG.warn("Invalid event " + event.getType() + ". Ignoring.");
+        throw new YarnRuntimeException(
+            "Got an unknown ContainerManagerEvent type: " + event.getType());
     }
   }
 
@@ -759,4 +888,7 @@ public class ContainerManagerImpl extend
     return this.context;
   }
 
+  public Map<String, ByteBuffer> getAuxServiceMetaData() {
+    return this.auxiliaryServices.getMetaData();
+  }
 }

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java Wed Oct 30 22:21:59 2013
@@ -177,6 +177,13 @@ public class ApplicationImpl implements 
                    ApplicationState.APPLICATION_RESOURCES_CLEANINGUP),
                ApplicationEventType.APPLICATION_CONTAINER_FINISHED,
                new AppFinishTransition())
+          .addTransition(ApplicationState.FINISHING_CONTAINERS_WAIT,
+              ApplicationState.FINISHING_CONTAINERS_WAIT,
+              EnumSet.of(
+                  ApplicationEventType.APPLICATION_LOG_HANDLING_INITED,
+                  ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED,
+                  ApplicationEventType.APPLICATION_INITED,
+                  ApplicationEventType.FINISH_APPLICATION))
 
            // Transitions from APPLICATION_RESOURCES_CLEANINGUP state
            .addTransition(ApplicationState.APPLICATION_RESOURCES_CLEANINGUP,
@@ -186,12 +193,25 @@ public class ApplicationImpl implements 
                ApplicationState.FINISHED,
                ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP,
                new AppCompletelyDoneTransition())
+          .addTransition(ApplicationState.APPLICATION_RESOURCES_CLEANINGUP,
+              ApplicationState.APPLICATION_RESOURCES_CLEANINGUP,
+              EnumSet.of(
+                  ApplicationEventType.APPLICATION_LOG_HANDLING_INITED,
+                  ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED,
+                  ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED,
+                  ApplicationEventType.APPLICATION_INITED,
+                  ApplicationEventType.FINISH_APPLICATION))
            
            // Transitions from FINISHED state
            .addTransition(ApplicationState.FINISHED,
                ApplicationState.FINISHED,
                ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED,
                new AppLogsAggregatedTransition())
+           .addTransition(ApplicationState.FINISHED, ApplicationState.FINISHED,
+               EnumSet.of(
+                  ApplicationEventType.APPLICATION_LOG_HANDLING_INITED,
+                  ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED,
+                  ApplicationEventType.FINISH_APPLICATION))
                
            // create the topology tables
            .installTopology();
@@ -343,7 +363,7 @@ public class ApplicationImpl implements 
     @Override
     public ApplicationState transition(ApplicationImpl app,
         ApplicationEvent event) {
-
+      ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event;
       if (app.containers.isEmpty()) {
         // No container to cleanup. Cleanup app level resources.
         app.handleAppFinishWithContainersCleanedup();
@@ -355,7 +375,7 @@ public class ApplicationImpl implements 
       for (ContainerId containerID : app.containers.keySet()) {
         app.dispatcher.getEventHandler().handle(
             new ContainerKillEvent(containerID,
-                "Container killed on application-finish event from RM."));
+                "Container killed on application-finish event: " + appEvent.getDiagnostic()));
       }
       return ApplicationState.FINISHING_CONTAINERS_WAIT;
     }
@@ -395,6 +415,7 @@ public class ApplicationImpl implements 
       app.dispatcher.getEventHandler().handle(
           new LogHandlerAppFinishedEvent(app.appId));
 
+      app.context.getNMTokenSecretManager().appFinished(app.getAppId());
     }
   }
 

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java Wed Oct 30 22:21:59 2013
@@ -503,6 +503,9 @@ public class ContainerImpl implements Co
       final ContainerLaunchContext ctxt = container.launchContext;
       container.metrics.initingContainer();
 
+      container.dispatcher.getEventHandler().handle(new AuxServicesEvent
+          (AuxServicesEventType.CONTAINER_INIT, container));
+
       // Inform the AuxServices about the opaque serviceData
       Map<String,ByteBuffer> csd = ctxt.getServiceData();
       if (csd != null) {
@@ -820,8 +823,16 @@ public class ContainerImpl implements Co
   static class ContainerDoneTransition implements
       SingleArcTransition<ContainerImpl, ContainerEvent> {
     @Override
+    @SuppressWarnings("unchecked")
     public void transition(ContainerImpl container, ContainerEvent event) {
       container.finished();
+      //if the current state is NEW it means the CONTAINER_INIT was never 
+      // sent for the event, thus no need to send the CONTAINER_STOP
+      if (container.getCurrentState() 
+          != org.apache.hadoop.yarn.api.records.ContainerState.NEW) {
+        container.dispatcher.getEventHandler().handle(new AuxServicesEvent
+            (AuxServicesEventType.CONTAINER_STOP, container));
+      }
     }
   }
 

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java Wed Oct 30 22:21:59 2013
@@ -26,6 +26,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintStream;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.HashMap;
@@ -60,16 +61,19 @@ import org.apache.hadoop.yarn.server.nod
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
 import org.apache.hadoop.yarn.server.nodemanager.util.ProcessIdFileReader;
 import org.apache.hadoop.yarn.util.Apps;
+import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 
 public class ContainerLaunch implements Callable<Integer> {
@@ -88,6 +92,7 @@ public class ContainerLaunch implements 
   private final Container container;
   private final Configuration conf;
   private final Context context;
+  private final ContainerManagerImpl containerManager;
   
   private volatile AtomicBoolean shouldLaunchContainer = new AtomicBoolean(false);
   private volatile AtomicBoolean completed = new AtomicBoolean(false);
@@ -101,7 +106,8 @@ public class ContainerLaunch implements 
 
   public ContainerLaunch(Context context, Configuration configuration,
       Dispatcher dispatcher, ContainerExecutor exec, Application app,
-      Container container, LocalDirsHandlerService dirsHandler) {
+      Container container, LocalDirsHandlerService dirsHandler,
+      ContainerManagerImpl containerManager) {
     this.context = context;
     this.conf = configuration;
     this.app = app;
@@ -109,6 +115,7 @@ public class ContainerLaunch implements 
     this.container = container;
     this.dispatcher = dispatcher;
     this.dirsHandler = dirsHandler;
+    this.containerManager = containerManager;
     this.sleepDelayBeforeSigKill =
         conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
             YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS);
@@ -127,10 +134,22 @@ public class ContainerLaunch implements 
     final List<String> command = launchContext.getCommands();
     int ret = -1;
 
+    // CONTAINER_KILLED_ON_REQUEST should not be missed if the container
+    // is already at KILLING
+    if (container.getContainerState() == ContainerState.KILLING) {
+      dispatcher.getEventHandler().handle(
+          new ContainerExitEvent(containerID,
+              ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
+              Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() :
+                  ExitCode.TERMINATED.getExitCode(),
+              "Container terminated before launch."));
+      return 0;
+    }
+
     try {
       localResources = container.getLocalizedResources();
       if (localResources == null) {
-        RPCUtil.getRemoteException(
+        throw RPCUtil.getRemoteException(
             "Unable to get local resources when Container " + containerID +
             " is at " + container.getContainerState());
       }
@@ -227,7 +246,6 @@ public class ContainerLaunch implements 
             ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, 
             new Path(containerWorkDir, 
                 FINAL_CONTAINER_TOKENS_FILE).toUri().getPath());
-
         // Sanitize the container's environment
         sanitizeEnv(environment, containerWorkDir, appDirs, containerLogDirs,
           localResources);
@@ -680,6 +698,12 @@ public class ContainerLaunch implements 
         environment.put(Environment.CLASSPATH.name(), classPathJar);
       }
     }
+    // put AuxiliaryService data to environment
+    for (Map.Entry<String, ByteBuffer> meta : containerManager
+        .getAuxServiceMetaData().entrySet()) {
+      AuxiliaryServiceHelper.setServiceDataIntoEnv(
+          meta.getKey(), meta.getValue(), environment);
+    }
   }
     
   static void writeLaunchEnv(OutputStream out,

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java Wed Oct 30 22:21:59 2013
@@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.server.nod
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
@@ -65,6 +66,7 @@ public class ContainersLauncher extends 
   private final Context context;
   private final ContainerExecutor exec;
   private final Dispatcher dispatcher;
+  private final ContainerManagerImpl containerManager;
 
   private LocalDirsHandlerService dirsHandler;
   @VisibleForTesting
@@ -73,28 +75,19 @@ public class ContainersLauncher extends 
         new ThreadFactoryBuilder()
           .setNameFormat("ContainersLauncher #%d")
           .build());
-  private final Map<ContainerId,RunningContainer> running =
-    Collections.synchronizedMap(new HashMap<ContainerId,RunningContainer>());
-
-  private static final class RunningContainer {
-    public RunningContainer(Future<Integer> submit,
-        ContainerLaunch launcher) {
-      this.runningcontainer = submit;
-      this.launcher = launcher;
-    }
-
-    Future<Integer> runningcontainer;
-    ContainerLaunch launcher;
-  }
-
+  @VisibleForTesting
+  public final Map<ContainerId, ContainerLaunch> running =
+    Collections.synchronizedMap(new HashMap<ContainerId, ContainerLaunch>());
 
   public ContainersLauncher(Context context, Dispatcher dispatcher,
-      ContainerExecutor exec, LocalDirsHandlerService dirsHandler) {
+      ContainerExecutor exec, LocalDirsHandlerService dirsHandler,
+      ContainerManagerImpl containerManager) {
     super("containers-launcher");
     this.exec = exec;
     this.context = context;
     this.dispatcher = dispatcher;
     this.dirsHandler = dirsHandler;
+    this.containerManager = containerManager;
   }
 
   @Override
@@ -128,39 +121,21 @@ public class ContainersLauncher extends 
 
         ContainerLaunch launch =
             new ContainerLaunch(context, getConfig(), dispatcher, exec, app,
-              event.getContainer(), dirsHandler);
-        running.put(containerId,
-            new RunningContainer(containerLauncher.submit(launch), 
-                launch));
+              event.getContainer(), dirsHandler, containerManager);
+        containerLauncher.submit(launch);
+        running.put(containerId, launch);
         break;
       case CLEANUP_CONTAINER:
-        RunningContainer rContainerDatum = running.remove(containerId);
-        if (rContainerDatum == null) {
+        ContainerLaunch launcher = running.remove(containerId);
+        if (launcher == null) {
           // Container not launched. So nothing needs to be done.
           return;
         }
-        Future<Integer> rContainer = rContainerDatum.runningcontainer;
-        if (rContainer != null 
-            && !rContainer.isDone()) {
-          // Cancel the future so that it won't be launched if it isn't already.
-          // If it is going to be canceled, make sure CONTAINER_KILLED_ON_REQUEST
-          // will not be missed if the container is already at KILLING
-          if (rContainer.cancel(false)) {
-            if (container.getContainerState() == ContainerState.KILLING) {
-              dispatcher.getEventHandler().handle(
-                  new ContainerExitEvent(containerId,
-                      ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
-                      Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : 
-                        ExitCode.TERMINATED.getExitCode(),
-                      "Container terminated before launch."));
-            }
-          }
-        }
 
         // Cleanup a container whether it is running/killed/completed, so that
         // no sub-processes are alive.
         try {
-          rContainerDatum.launcher.cleanupContainer();
+          launcher.cleanupContainer();
         } catch (IOException e) {
           LOG.warn("Got exception while cleaning container " + containerId
               + ". Ignoring.");

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java Wed Oct 30 22:21:59 2013
@@ -65,7 +65,6 @@ import org.apache.hadoop.yarn.server.nod
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
-import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.FSDownload;
 
@@ -130,9 +129,12 @@ public class ContainerLocalizer {
     try {
       // assume credentials in cwd
       // TODO: Fix
-      credFile = lfs.open(
-          new Path(String.format(TOKEN_FILE_NAME_FMT, localizerId)));
+      Path tokenPath =
+          new Path(String.format(TOKEN_FILE_NAME_FMT, localizerId));
+      credFile = lfs.open(tokenPath);
       creds.readTokenStorageStream(credFile);
+      // Explicitly deleting token file.
+      lfs.delete(tokenPath, false);      
     } finally  {
       if (credFile != null) {
         credFile.close();

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java Wed Oct 30 22:21:59 2013
@@ -1017,6 +1017,7 @@ public class ResourceLocalizationService
           }
         }
         if (UserGroupInformation.isSecurityEnabled()) {
+          credentials = new Credentials(credentials);
           LocalizerTokenIdentifier id = secretManager.createIdentifier();
           Token<LocalizerTokenIdentifier> localizerToken =
               new Token<LocalizerTokenIdentifier>(id, secretManager);

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java Wed Oct 30 22:21:59 2013
@@ -32,6 +32,7 @@ import java.util.Map.Entry;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -40,6 +41,8 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.SystemClock;
 
 public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
 
@@ -59,8 +62,13 @@ public class CgroupsLCEResourcesHandler 
   private final int CPU_DEFAULT_WEIGHT = 1024; // set by kernel
   private final Map<String, String> controllerPaths; // Controller -> path
 
+  private long deleteCgroupTimeout;
+  // package private for testing purposes
+  Clock clock;
+  
   public CgroupsLCEResourcesHandler() {
     this.controllerPaths = new HashMap<String, String>();
+    clock = new SystemClock();
   }
 
   @Override
@@ -73,7 +81,8 @@ public class CgroupsLCEResourcesHandler 
     return conf;
   }
 
-  public synchronized void init(LinuxContainerExecutor lce) throws IOException {
+  @VisibleForTesting
+  void initConfig() throws IOException {
 
     this.cgroupPrefix = conf.get(YarnConfiguration.
             NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn");
@@ -82,6 +91,9 @@ public class CgroupsLCEResourcesHandler 
     this.cgroupMountPath = conf.get(YarnConfiguration.
             NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH, null);
 
+    this.deleteCgroupTimeout = conf.getLong(
+        YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT,
+        YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT);
     // remove extra /'s at end or start of cgroupPrefix
     if (cgroupPrefix.charAt(0) == '/') {
       cgroupPrefix = cgroupPrefix.substring(1);
@@ -91,7 +103,11 @@ public class CgroupsLCEResourcesHandler 
     if (cgroupPrefix.charAt(len - 1) == '/') {
       cgroupPrefix = cgroupPrefix.substring(0, len - 1);
     }
+  }
   
+  public void init(LinuxContainerExecutor lce) throws IOException {
+    initConfig();
+    
     // mount cgroups if requested
     if (cgroupMount && cgroupMountPath != null) {
       ArrayList<String> cgroupKVs = new ArrayList<String>();
@@ -158,14 +174,32 @@ public class CgroupsLCEResourcesHandler 
     }
   }
 
-  private void deleteCgroup(String controller, String groupName) {
-    String path = pathForCgroup(controller, groupName);
+  @VisibleForTesting
+  boolean deleteCgroup(String cgroupPath) {
+    boolean deleted;
+    
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("deleteCgroup: " + cgroupPath);
+    }
 
-    LOG.debug("deleteCgroup: " + path);
+    long start = clock.getTime();
+    do {
+      deleted = new File(cgroupPath).delete();
+      if (!deleted) {
+        try {
+          Thread.sleep(20);
+        } catch (InterruptedException ex) {
+          // NOP        
+        }
+      }
+    } while (!deleted && (clock.getTime() - start) < deleteCgroupTimeout);
 
-    if (! new File(path).delete()) {
-      LOG.warn("Unable to delete cgroup at: " + path);
+    if (!deleted) {
+      LOG.warn("Unable to delete cgroup at: " + cgroupPath +
+          ", tried to delete for " + deleteCgroupTimeout + "ms");
     }
+
+    return deleted;
   }
 
   /*
@@ -185,21 +219,8 @@ public class CgroupsLCEResourcesHandler 
   }
 
   private void clearLimits(ContainerId containerId) {
-    String containerName = containerId.toString();
-
-    // Based on testing, ApplicationMaster executables don't terminate until
-    // a little after the container appears to have finished. Therefore, we
-    // wait a short bit for the cgroup to become empty before deleting it.
-    if (containerId.getId() == 1) {
-      try {
-        Thread.sleep(500);
-      } catch (InterruptedException e) {
-        // not a problem, continue anyway
-      }
-    }
-
     if (isCpuWeightEnabled()) {
-      deleteCgroup(CONTROLLER_CPU, containerName);
+      deleteCgroup(pathForCgroup(CONTROLLER_CPU, containerId.toString()));
     }
   }
 

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NavBlock.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NavBlock.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NavBlock.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NavBlock.java Wed Oct 30 22:21:59 2013
@@ -19,8 +19,9 @@
 package org.apache.hadoop.yarn.server.nodemanager.webapp;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.yarn.webapp.YarnWebParams;
+import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
 
 import com.google.inject.Inject;
@@ -37,8 +38,9 @@ public class NavBlock extends HtmlBlock 
   @Override
   protected void render(Block html) {
 	
-	String RMWebAppURL = YarnConfiguration.getRMWebAppURL(this.conf);
-	html
+    String RMWebAppURL =
+        WebAppUtils.getResolvedRMWebAppURLWithScheme(this.conf);
+	  html
       .div("#nav")
       .h3()._("ResourceManager")._()
         .ul()

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java Wed Oct 30 22:21:59 2013
@@ -22,7 +22,7 @@ import static org.apache.hadoop.yarn.uti
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
@@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.webapp.Gen
 import org.apache.hadoop.yarn.webapp.WebApp;
 import org.apache.hadoop.yarn.webapp.WebApps;
 import org.apache.hadoop.yarn.webapp.YarnWebParams;
+import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
 
 public class WebServer extends AbstractService {
 
@@ -54,8 +55,8 @@ public class WebServer extends AbstractS
 
   @Override
   protected void serviceStart() throws Exception {
-    String bindAddress = getConfig().get(YarnConfiguration.NM_WEBAPP_ADDRESS,
-        YarnConfiguration.DEFAULT_NM_WEBAPP_ADDRESS);
+    String bindAddress = WebAppUtils.getNMWebAppURLWithoutScheme(getConfig());
+    
     LOG.info("Instantiating NMWebApp at " + bindAddress);
     try {
       this.webApp =

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c Wed Oct 30 22:21:59 2013
@@ -30,6 +30,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 
@@ -406,7 +407,7 @@ static int create_container_directories(
     const char *container_id, char* const* local_dir, char* const* log_dir, const char *work_dir) {
   // create dirs as 0750
   const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP;
-  if (app_id == NULL || container_id == NULL || user == NULL) {
+  if (app_id == NULL || container_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) {
     fprintf(LOGFILE, 
             "Either app_id, container_id or the user passed is null.\n");
     return -1;
@@ -492,6 +493,21 @@ static struct passwd* get_user_info(cons
   return result;
 }
 
+int is_whitelisted(const char *user) {
+  char **whitelist = get_values(ALLOWED_SYSTEM_USERS_KEY);
+  char **users = whitelist;
+  if (whitelist != NULL) {
+    for(; *users; ++users) {
+      if (strncmp(*users, user, LOGIN_NAME_MAX) == 0) {
+        free_values(whitelist);
+        return 1;
+      }
+    }
+    free_values(whitelist);
+  }
+  return 0;
+}
+
 /**
  * Is the user a real user account?
  * Checks:
@@ -526,9 +542,9 @@ struct passwd* check_user(const char *us
     fflush(LOGFILE);
     return NULL;
   }
-  if (user_info->pw_uid < min_uid) {
-    fprintf(LOGFILE, "Requested user %s has id %d, which is below the "
-	    "minimum allowed %d\n", user, user_info->pw_uid, min_uid);
+  if (user_info->pw_uid < min_uid && !is_whitelisted(user)) {
+    fprintf(LOGFILE, "Requested user %s is not whitelisted and has id %d,"
+	    "which is below the minimum allowed %d\n", user, user_info->pw_uid, min_uid);
     fflush(LOGFILE);
     free(user_info);
     return NULL;
@@ -735,28 +751,11 @@ int initialize_user(const char *user, ch
   return failed ? INITIALIZE_USER_FAILED : 0;
 }
 
-/**
- * Function to prepare the application directories for the container.
- */
-int initialize_app(const char *user, const char *app_id,
-                   const char* nmPrivate_credentials_file,
-                   char* const* local_dirs, char* const* log_roots,
-                   char* const* args) {
-  if (app_id == NULL || user == NULL) {
-    fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n");
-    return INVALID_ARGUMENT_NUMBER;
-  }
+int create_log_dirs(const char *app_id, char * const * log_dirs) {
 
-  // create the user directory on all disks
-  int result = initialize_user(user, local_dirs);
-  if (result != 0) {
-    return result;
-  }
-
-  ////////////// create the log directories for the app on all disks
   char* const* log_root;
   char *any_one_app_log_dir = NULL;
-  for(log_root=log_roots; *log_root != NULL; ++log_root) {
+  for(log_root=log_dirs; *log_root != NULL; ++log_root) {
     char *app_log_dir = get_app_log_directory(*log_root, app_id);
     if (app_log_dir == NULL) {
       // try the next one
@@ -775,7 +774,33 @@ int initialize_app(const char *user, con
     return -1;
   }
   free(any_one_app_log_dir);
-  ////////////// End of creating the log directories for the app on all disks
+  return 0;
+}
+
+
+/**
+ * Function to prepare the application directories for the container.
+ */
+int initialize_app(const char *user, const char *app_id,
+                   const char* nmPrivate_credentials_file,
+                   char* const* local_dirs, char* const* log_roots,
+                   char* const* args) {
+  if (app_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) {
+    fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n");
+    return INVALID_ARGUMENT_NUMBER;
+  }
+
+  // create the user directory on all disks
+  int result = initialize_user(user, local_dirs);
+  if (result != 0) {
+    return result;
+  }
+
+  // create the log directories for the app on all disks
+  int log_create_result = create_log_dirs(app_id, log_roots);
+  if (log_create_result != 0) {
+    return log_create_result;
+  }
 
   // open up the credentials file
   int cred_file = open_file_as_nm(nmPrivate_credentials_file);
@@ -906,18 +931,34 @@ int launch_container_as_user(const char 
     }
   }
 
+  // create the user directory on all disks
+  int result = initialize_user(user, local_dirs);
+  if (result != 0) {
+    return result;
+  }
+
+  // initializing log dirs
+  int log_create_result = create_log_dirs(app_id, log_dirs);
+  if (log_create_result != 0) {
+    return log_create_result;
+  }
+
   // give up root privs
   if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
     exit_code = SETUID_OPER_FAILED;
     goto cleanup;
   }
 
+  // Create container specific directories as user. If there are no resources
+  // to localize for this container, app-directories and log-directories are
+  // also created automatically as part of this call.
   if (create_container_directories(user, app_id, container_id, local_dirs,
                                    log_dirs, work_dir) != 0) {
     fprintf(LOGFILE, "Could not create container dirs");
     goto cleanup;
   }
 
+
   // 700
   if (copy_file(container_file_source, script_name, script_file_dest,S_IRWXU) != 0) {
     goto cleanup;

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h Wed Oct 30 22:21:59 2013
@@ -65,6 +65,7 @@ enum errorcodes {
 #define CREDENTIALS_FILENAME "container_tokens"
 #define MIN_USERID_KEY "min.user.id"
 #define BANNED_USERS_KEY "banned.users"
+#define ALLOWED_SYSTEM_USERS_KEY "allowed.system.users"
 #define TMP_DIR "tmp"
 
 extern struct passwd *user_detail;

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c Wed Oct 30 22:21:59 2013
@@ -49,7 +49,7 @@ void display_usage(FILE *stream) {
           "Usage: container-executor --mount-cgroups "\
           "hierarchy controller=path...\n");
   fprintf(stream,
-      "Usage: container-executor user command command-args\n");
+      "Usage: container-executor user yarn-user command command-args\n");
   fprintf(stream, "Commands:\n");
   fprintf(stream, "   initialize container: %2d appid tokens " \
    "nm-local-dirs nm-log-dirs cmd app...\n", INITIALIZE_CONTAINER);
@@ -178,18 +178,29 @@ int main(int argc, char **argv) {
   if (ret != 0) {
     return ret;
   }
+
+  // this string is used for building pathnames, the
+  // process management is done based on the 'user_detail'
+  // global, which was set by 'set_user()' above
+  optind = optind + 1;
+  char *yarn_user_name = argv[optind];
+  if (yarn_user_name == NULL) {
+    fprintf(ERRORFILE, "Invalid yarn user name.\n");
+    return INVALID_USER_NAME;
+  }
  
   optind = optind + 1;
   command = atoi(argv[optind++]);
 
   fprintf(LOGFILE, "main : command provided %d\n",command);
   fprintf(LOGFILE, "main : user is %s\n", user_detail->pw_name);
+  fprintf(LOGFILE, "main : requested yarn user is %s\n", yarn_user_name);
   fflush(LOGFILE);
 
   switch (command) {
   case INITIALIZE_CONTAINER:
-    if (argc < 8) {
-      fprintf(ERRORFILE, "Too few arguments (%d vs 8) for initialize container\n",
+    if (argc < 9) {
+      fprintf(ERRORFILE, "Too few arguments (%d vs 9) for initialize container\n",
 	      argc);
       fflush(ERRORFILE);
       return INVALID_ARGUMENT_NUMBER;
@@ -198,13 +209,13 @@ int main(int argc, char **argv) {
     cred_file = argv[optind++];
     local_dirs = argv[optind++];// good local dirs as a comma separated list
     log_dirs = argv[optind++];// good log dirs as a comma separated list
-    exit_code = initialize_app(user_detail->pw_name, app_id, cred_file,
+    exit_code = initialize_app(yarn_user_name, app_id, cred_file,
                                extract_values(local_dirs),
                                extract_values(log_dirs), argv + optind);
     break;
   case LAUNCH_CONTAINER:
-    if (argc != 12) {
-      fprintf(ERRORFILE, "Wrong number of arguments (%d vs 12) for launch container\n",
+    if (argc != 13) {
+      fprintf(ERRORFILE, "Wrong number of arguments (%d vs 13) for launch container\n",
 	      argc);
       fflush(ERRORFILE);
       return INVALID_ARGUMENT_NUMBER;
@@ -230,7 +241,7 @@ int main(int argc, char **argv) {
         return INVALID_ARGUMENT_NUMBER;
     }
     char** resources_values = extract_values(resources_value);
-    exit_code = launch_container_as_user(user_detail->pw_name, app_id,
+    exit_code = launch_container_as_user(yarn_user_name, app_id,
                     container_id, current_dir, script_file, cred_file,
                     pid_file, extract_values(local_dirs),
                     extract_values(log_dirs), resources_key,
@@ -239,8 +250,8 @@ int main(int argc, char **argv) {
     free(resources_value);
     break;
   case SIGNAL_CONTAINER:
-    if (argc != 5) {
-      fprintf(ERRORFILE, "Wrong number of arguments (%d vs 5) for " \
+    if (argc != 6) {
+      fprintf(ERRORFILE, "Wrong number of arguments (%d vs 6) for " \
           "signal container\n", argc);
       fflush(ERRORFILE);
       return INVALID_ARGUMENT_NUMBER;
@@ -260,12 +271,12 @@ int main(int argc, char **argv) {
         fflush(ERRORFILE);
         return INVALID_ARGUMENT_NUMBER;
       }
-      exit_code = signal_container_as_user(user_detail->pw_name, container_pid, signal);
+      exit_code = signal_container_as_user(yarn_user_name, container_pid, signal);
     }
     break;
   case DELETE_AS_USER:
     dir_to_be_deleted = argv[optind++];
-    exit_code= delete_as_user(user_detail->pw_name, dir_to_be_deleted,
+    exit_code= delete_as_user(yarn_user_name, dir_to_be_deleted,
                               argv + optind);
     break;
   default:

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c Wed Oct 30 22:21:59 2013
@@ -32,14 +32,13 @@
 #define DONT_TOUCH_FILE "dont-touch-me"
 #define NM_LOCAL_DIRS       TEST_ROOT "/local-1," TEST_ROOT "/local-2," \
                TEST_ROOT "/local-3," TEST_ROOT "/local-4," TEST_ROOT "/local-5"
-#define NM_LOG_DIRS         TEST_ROOT "/logdir_1," TEST_ROOT "/logdir_2," \
-                            TEST_ROOT "/logdir_3," TEST_ROOT "/logdir_4"
+#define NM_LOG_DIRS         TEST_ROOT "/logs/userlogs"
 #define ARRAY_SIZE 1000
 
 static char* username = NULL;
-static char* local_dirs = NULL;
-static char* log_dirs = NULL;
-static char* resources = NULL;
+static char* yarn_username = NULL;
+static char** local_dirs = NULL;
+static char** log_dirs = NULL;
 
 /**
  * Run the command using the effective user id.
@@ -99,6 +98,7 @@ int write_config_file(char *file_name) {
   }
   fprintf(file, "banned.users=bannedUser\n");
   fprintf(file, "min.user.id=500\n");
+  fprintf(file, "allowed.system.users=allowedUser,bin\n");
   fclose(file);
   return 0;
 }
@@ -121,6 +121,33 @@ void create_nm_roots(char ** nm_roots) {
   }
 }
 
+void check_pid_file(const char* pid_file, pid_t mypid) {
+  if(access(pid_file, R_OK) != 0) {
+    printf("FAIL: failed to create pid file %s\n", pid_file);
+    exit(1);
+  }
+  int pidfd = open(pid_file, O_RDONLY);
+  if (pidfd == -1) {
+    printf("FAIL: failed to open pid file %s - %s\n", pid_file, strerror(errno));
+    exit(1);
+  }
+
+  char pidBuf[100];
+  ssize_t bytes = read(pidfd, pidBuf, 100);
+  if (bytes == -1) {
+    printf("FAIL: failed to read from pid file %s - %s\n", pid_file, strerror(errno));
+    exit(1);
+  }
+
+  char myPidBuf[33];
+  snprintf(myPidBuf, 33, "%d", mypid);
+  if (strncmp(pidBuf, myPidBuf, strlen(myPidBuf)) != 0) {
+    printf("FAIL: failed to find matching pid in pid file\n");
+    printf("FAIL: Expected pid %d : Got %.*s", mypid, (int)bytes, pidBuf);
+    exit(1);
+  }
+}
+
 void test_get_user_directory() {
   char *user_dir = get_user_directory("/tmp", "user");
   char *expected = "/tmp/usercache/user";
@@ -195,6 +222,10 @@ void test_check_user() {
     printf("FAIL: failed check for system user root\n");
     exit(1);
   }
+  if (check_user("bin") == NULL) {
+    printf("FAIL: failed check for whitelisted system user bin\n");
+    exit(1);
+  }
 }
 
 void test_resolve_config_path() {
@@ -222,15 +253,15 @@ void test_check_configuration_permission
 }
 
 void test_delete_container() {
-  if (initialize_user(username, extract_values(local_dirs))) {
-    printf("FAIL: failed to initialize user %s\n", username);
+  if (initialize_user(yarn_username, local_dirs)) {
+    printf("FAIL: failed to initialize user %s\n", yarn_username);
     exit(1);
   }
-  char* app_dir = get_app_directory(TEST_ROOT "/local-2", username, "app_1");
-  char* dont_touch = get_app_directory(TEST_ROOT "/local-2", username, 
+  char* app_dir = get_app_directory(TEST_ROOT "/local-2", yarn_username, "app_1");
+  char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, 
                                        DONT_TOUCH_FILE);
   char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", 
-					      username, "app_1", "container_1");
+					      yarn_username, "app_1", "container_1");
   char buffer[100000];
   sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir);
   run(buffer);
@@ -257,7 +288,7 @@ void test_delete_container() {
 
   // delete container directory
   char * dirs[] = {app_dir, 0};
-  int ret = delete_as_user(username, "container_1" , dirs);
+  int ret = delete_as_user(yarn_username, "container_1" , dirs);
   if (ret != 0) {
     printf("FAIL: return code from delete_as_user is %d\n", ret);
     exit(1);
@@ -288,11 +319,11 @@ void test_delete_container() {
 }
 
 void test_delete_app() {
-  char* app_dir = get_app_directory(TEST_ROOT "/local-2", username, "app_2");
-  char* dont_touch = get_app_directory(TEST_ROOT "/local-2", username, 
+  char* app_dir = get_app_directory(TEST_ROOT "/local-2", yarn_username, "app_2");
+  char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, 
                                        DONT_TOUCH_FILE);
   char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", 
-					      username, "app_2", "container_1");
+					      yarn_username, "app_2", "container_1");
   char buffer[100000];
   sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir);
   run(buffer);
@@ -318,7 +349,7 @@ void test_delete_app() {
   run(buffer);
 
   // delete container directory
-  int ret = delete_as_user(username, app_dir, NULL);
+  int ret = delete_as_user(yarn_username, app_dir, NULL);
   if (ret != 0) {
     printf("FAIL: return code from delete_as_user is %d\n", ret);
     exit(1);
@@ -347,17 +378,17 @@ void test_delete_app() {
 
 void test_delete_user() {
   printf("\nTesting delete_user\n");
-  char* app_dir = get_app_directory(TEST_ROOT "/local-1", username, "app_3");
+  char* app_dir = get_app_directory(TEST_ROOT "/local-1", yarn_username, "app_3");
   if (mkdirs(app_dir, 0700) != 0) {
     exit(1);
   }
   char buffer[100000];
-  sprintf(buffer, "%s/local-1/usercache/%s", TEST_ROOT, username);
+  sprintf(buffer, "%s/local-1/usercache/%s", TEST_ROOT, yarn_username);
   if (access(buffer, R_OK) != 0) {
     printf("FAIL: directory missing before test\n");
     exit(1);
   }
-  if (delete_as_user(username, buffer, NULL) != 0) {
+  if (delete_as_user(yarn_username, buffer, NULL) != 0) {
     exit(1);
   }
   if (access(buffer, R_OK) == 0) {
@@ -416,7 +447,7 @@ void test_signal_container() {
     exit(0);
   } else {
     printf("Child container launched as %d\n", child);
-    if (signal_container_as_user(username, child, SIGQUIT) != 0) {
+    if (signal_container_as_user(yarn_username, child, SIGQUIT) != 0) {
       exit(1);
     }
     int status = 0;
@@ -453,7 +484,10 @@ void test_signal_container_group() {
     exit(0);
   }
   printf("Child container launched as %d\n", child);
-  if (signal_container_as_user(username, child, SIGKILL) != 0) {
+  // there's a race condition for child calling change_user and us 
+  // calling signal_container_as_user, hence sleeping
+  sleep(3);
+  if (signal_container_as_user(yarn_username, child, SIGKILL) != 0) {
     exit(1);
   }
   int status = 0;
@@ -517,8 +551,8 @@ void test_init_app() {
     exit(1);
   } else if (child == 0) {
     char *final_pgm[] = {"touch", "my-touch-file", 0};
-    if (initialize_app(username, "app_4", TEST_ROOT "/creds.txt", final_pgm,
-        extract_values(local_dirs), extract_values(log_dirs)) != 0) {
+    if (initialize_app(yarn_username, "app_4", TEST_ROOT "/creds.txt",
+                       local_dirs, log_dirs, final_pgm) != 0) {
       printf("FAIL: failed in child\n");
       exit(42);
     }
@@ -535,13 +569,13 @@ void test_init_app() {
     printf("FAIL: failed to create app log directory\n");
     exit(1);
   }
-  char* app_dir = get_app_directory(TEST_ROOT "/local-1", username, "app_4");
+  char* app_dir = get_app_directory(TEST_ROOT "/local-1", yarn_username, "app_4");
   if (access(app_dir, R_OK) != 0) {
     printf("FAIL: failed to create app directory %s\n", app_dir);
     exit(1);
   }
   char buffer[100000];
-  sprintf(buffer, "%s/jobToken", app_dir);
+  sprintf(buffer, "%s/creds.txt", app_dir);
   if (access(buffer, R_OK) != 0) {
     printf("FAIL: failed to create credentials %s\n", buffer);
     exit(1);
@@ -552,7 +586,7 @@ void test_init_app() {
     exit(1);
   }
   free(app_dir);
-  app_dir = get_app_log_directory("logs","app_4");
+  app_dir = get_app_log_directory(TEST_ROOT "/logs/userlogs","app_4");
   if (access(app_dir, R_OK) != 0) {
     printf("FAIL: failed to create app log directory %s\n", app_dir);
     exit(1);
@@ -580,6 +614,10 @@ void test_run_container() {
     exit(1);
   }
 
+  char * cgroups_pids[] = { TEST_ROOT "/cgroups-pid1.txt", TEST_ROOT "/cgroups-pid2.txt", 0 };
+  close(creat(cgroups_pids[0], O_RDWR));
+  close(creat(cgroups_pids[1], O_RDWR));
+
   const char* script_name = TEST_ROOT "/container-script";
   FILE* script = fopen(script_name, "w");
   if (script == NULL) {
@@ -603,25 +641,19 @@ void test_run_container() {
   fflush(stdout);
   fflush(stderr);
   char* container_dir = get_container_work_directory(TEST_ROOT "/local-1", 
-					      username, "app_4", "container_1");
+					      yarn_username, "app_4", "container_1");
   const char * pid_file = TEST_ROOT "/pid.txt";
+
   pid_t child = fork();
   if (child == -1) {
     printf("FAIL: failed to fork process for init_app - %s\n", 
 	   strerror(errno));
     exit(1);
   } else if (child == 0) {
-    char *key = malloc(strlen(resources));
-    char *value = malloc(strlen(resources));
-    if (get_kv_key(resources, key, strlen(resources)) < 0 ||
-        get_kv_value(resources, key, strlen(resources)) < 0) {
-        printf("FAIL: resources failed - %s\n");
-        exit(1);
-    }
-    if (launch_container_as_user(username, "app_4", "container_1", 
+    if (launch_container_as_user(yarn_username, "app_4", "container_1", 
           container_dir, script_name, TEST_ROOT "/creds.txt", pid_file,
-          extract_values(local_dirs), extract_values(log_dirs),
-          key, extract_values(value)) != 0) {
+          local_dirs, log_dirs,
+          "cgroups", cgroups_pids) != 0) {
       printf("FAIL: failed in child\n");
       exit(42);
     }
@@ -649,44 +681,39 @@ void test_run_container() {
     exit(1);
   }
   free(container_dir);
-  container_dir = get_app_log_directory("logs", "app_4/container_1");
+  container_dir = get_app_log_directory(TEST_ROOT "/logs/userlogs", "app_4/container_1");
   if (access(container_dir, R_OK) != 0) {
     printf("FAIL: failed to create app log directory %s\n", container_dir);
     exit(1);
   }
   free(container_dir);
 
-  if(access(pid_file, R_OK) != 0) {
-    printf("FAIL: failed to create pid file %s\n", pid_file);
-    exit(1);
-  }
-  int pidfd = open(pid_file, O_RDONLY);
-  if (pidfd == -1) {
-    printf("FAIL: failed to open pid file %s - %s\n", pid_file, strerror(errno));
-    exit(1);
-  }
-
-  char pidBuf[100];
-  ssize_t bytes = read(pidfd, pidBuf, 100);
-  if (bytes == -1) {
-    printf("FAIL: failed to read from pid file %s - %s\n", pid_file, strerror(errno));
-    exit(1);
-  }
-
-  pid_t mypid = child;
-  char myPidBuf[33];
-  snprintf(myPidBuf, 33, "%d", mypid);
-  if (strncmp(pidBuf, myPidBuf, strlen(myPidBuf)) != 0) {
-    printf("FAIL: failed to find matching pid in pid file\n");
-    printf("FAIL: Expected pid %d : Got %.*s", mypid, (int)bytes, pidBuf);
+  if (seteuid(0) != 0) {
+    printf("FAIL: seteuid to root failed - %s\n", strerror(errno));
     exit(1);
   }
-}
 
+  check_pid_file(pid_file, child);
+  check_pid_file(cgroups_pids[0], child);
+  check_pid_file(cgroups_pids[1], child);
+}
+
+// This test is expected to be executed either by a regular
+// user or by root. If executed by a regular user it doesn't
+// test all the functions that would depend on changing the
+// effective user id. If executed by a super-user everything
+// gets tested. Here are different ways of execing the test binary:
+// 1. regular user assuming user == yarn user
+//    $ test-container-executor     
+// 2. regular user with a given yarn user
+//    $ test-container-executor yarn_user
+// 3. super user with a given user and assuming user == yarn user
+//    # test-container-executor user
+// 4. super user with a given user and a given yarn user
+//    # test-container-executor user yarn_user
 int main(int argc, char **argv) {
   LOGFILE = stdout;
   ERRORFILE = stderr;
-  int my_username = 0;
 
   // clean up any junk from previous run
   if (system("chmod -R u=rwx " TEST_ROOT "; rm -fr " TEST_ROOT)) {
@@ -702,18 +729,20 @@ int main(int argc, char **argv) {
   }
   read_config(TEST_ROOT "/test.cfg");
 
-  local_dirs = (char *) malloc (sizeof(char) * ARRAY_SIZE);
-  strcpy(local_dirs, NM_LOCAL_DIRS);
-  log_dirs = (char *) malloc (sizeof(char) * ARRAY_SIZE);
-  strcpy(log_dirs, NM_LOG_DIRS);
+  local_dirs = extract_values(strdup(NM_LOCAL_DIRS));
+  log_dirs = extract_values(strdup(NM_LOG_DIRS));
 
-  create_nm_roots(extract_values(local_dirs));
+  create_nm_roots(local_dirs);
 
-  if (getuid() == 0 && argc == 2) {
+  // See the description above of various ways this test
+  // can be executed in order to understand the following logic
+  char* current_username = strdup(getpwuid(getuid())->pw_name);
+  if (getuid() == 0 && (argc == 2 || argc == 3)) {
     username = argv[1];
+    yarn_username = (argc == 3) ? argv[2] : argv[1];
   } else {
-    username = strdup(getpwuid(getuid())->pw_name);
-    my_username = 1;
+    username = current_username;
+    yarn_username = (argc == 2) ? argv[1] : current_username;
   }
   set_nm_uid(geteuid(), getegid());
 
@@ -749,8 +778,6 @@ int main(int argc, char **argv) {
   printf("\nTesting delete_app()\n");
   test_delete_app();
 
-  test_delete_user();
-
   test_check_user();
 
   // the tests that change user need to be run in a subshell, so that
@@ -767,12 +794,13 @@ int main(int argc, char **argv) {
   }
 
   seteuid(0);
+  // test_delete_user must run as root since that's how we use the delete_as_user
+  test_delete_user();
+
   run("rm -fr " TEST_ROOT);
   printf("\nFinished tests\n");
 
-  if (my_username) {
-    free(username);
-  }
+  free(current_username);
   free_configurations();
   return 0;
 }

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/LocalizationProtocol.proto
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/LocalizationProtocol.proto?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/LocalizationProtocol.proto (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/LocalizationProtocol.proto Wed Oct 30 22:21:59 2013
@@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop
 option java_outer_classname = "LocalizationProtocol";
 option java_generic_services = true;
 option java_generate_equals_and_hash = true;
+package hadoop.yarn;
 
 import "yarn_server_nodemanager_service_protos.proto";
 

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto Wed Oct 30 22:21:59 2013
@@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop
 option java_outer_classname = "YarnServerNodemanagerServiceProtos";
 option java_generic_services = true;
 option java_generate_equals_and_hash = true;
+package hadoop.yarn;
 
 import "yarn_protos.proto";
 

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java Wed Oct 30 22:21:59 2013
@@ -145,7 +145,7 @@ public class DummyContainerManager exten
   protected ContainersLauncher createContainersLauncher(Context context,
       ContainerExecutor exec) {
     return new ContainersLauncher(context, super.dispatcher, exec,
-                                  super.dirsHandler) {
+                                  super.dirsHandler, this) {
       @Override
       public void handle(ContainersLauncherEvent event) {
         Container container = event.getContainer();

Modified: hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java?rev=1537330&r1=1537329&r2=1537330&view=diff
==============================================================================
--- hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java (original)
+++ hadoop/common/branches/YARN-321/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java Wed Oct 30 22:21:59 2013
@@ -178,6 +178,17 @@ public class TestContainerManagerWithLCE
     super.testMultipleContainersStopAndGetStatus();
   }
 
+  @Override
+  public void testStartContainerFailureWithUnknownAuxService() throws Exception {
+    // Don't run the test if the binary is not available.
+    if (!shouldRunTest()) {
+      LOG.info("LCE binary path is not passed. Not running the test");
+      return;
+    }
+    LOG.info("Running testContainerLaunchFromPreviousRM");
+    super.testStartContainerFailureWithUnknownAuxService();
+  }
+
   private boolean shouldRunTest() {
     return System
         .getProperty(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH) != null;



Mime
View raw message