hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject [30/51] [abbrv] hadoop git commit: YARN-7137. [YARN-3926] Move newly added APIs to unstable in YARN-3926 branch. Contributed by Wangda Tan.
Date Wed, 02 May 2018 20:41:01 GMT
YARN-7137. [YARN-3926] Move newly added APIs to unstable in YARN-3926 branch. Contributed by Wangda Tan.

(cherry picked from commit da0b6a354bf6f6bf37ca5a05a4a8eece09aa4893)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/15319c79
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/15319c79
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/15319c79

Branch: refs/heads/YARN-8200
Commit: 15319c79a6e67073246b6efff7527b6879a40fc7
Parents: 0406adc
Author: Sunil G <sunilg@apache.org>
Authored: Tue Sep 12 20:31:47 2017 +0530
Committer: Konstantin V Shvachko <kshvachko@linkedin.com>
Committed: Wed May 2 13:19:58 2018 -0700

----------------------------------------------------------------------
 .../hadoop/yarn/api/records/Resource.java       |  24 +-
 .../yarn/api/records/ResourceInformation.java   |  12 +-
 .../yarn/api/records/ResourceRequest.java       |   1 +
 .../hadoop/yarn/conf/YarnConfiguration.java     |  33 ++
 .../yarn/util/resource/ResourceUtils.java       |  70 +--
 .../hadoop/yarn/util/resource/package-info.java |   6 +-
 .../src/main/resources/yarn-default.xml         |  48 +-
 .../yarn/util/resource/TestResourceUtils.java   |  17 +
 .../server/nodemanager/ContainerExecutor.java   |   3 +-
 .../hadoop/yarn/server/nodemanager/Context.java |   3 +
 .../nodemanager/DefaultContainerExecutor.java   |   2 +-
 .../nodemanager/LinuxContainerExecutor.java     |  10 +-
 .../yarn/server/nodemanager/NodeManager.java    |  92 ++--
 .../nodemanager/NodeStatusUpdaterImpl.java      |  38 +-
 .../linux/privileged/PrivilegedOperation.java   |   1 +
 .../linux/resources/ResourceHandlerChain.java   |   4 +-
 .../linux/resources/ResourceHandlerModule.java  |  42 +-
 .../resources/gpu/GpuResourceAllocator.java     | 242 ++++++++
 .../resources/gpu/GpuResourceHandlerImpl.java   | 153 ++++++
 .../NodeResourceUpdaterPlugin.java              |  52 ++
 .../resourceplugin/ResourcePlugin.java          |  83 +++
 .../resourceplugin/ResourcePluginManager.java   | 106 ++++
 .../resourceplugin/gpu/GpuDiscoverer.java       | 254 +++++++++
 .../gpu/GpuNodeResourceUpdateHandler.java       |  66 +++
 .../resourceplugin/gpu/GpuResourcePlugin.java   |  61 +++
 .../webapp/dao/gpu/GpuDeviceInformation.java    |  72 +++
 .../dao/gpu/GpuDeviceInformationParser.java     |  87 +++
 .../webapp/dao/gpu/PerGpuDeviceInformation.java | 165 ++++++
 .../webapp/dao/gpu/PerGpuMemoryUsage.java       |  58 ++
 .../webapp/dao/gpu/PerGpuTemperature.java       |  80 +++
 .../webapp/dao/gpu/PerGpuUtilizations.java      |  50 ++
 .../server/nodemanager/NodeManagerTestBase.java | 164 ++++++
 .../TestDefaultContainerExecutor.java           |   4 +-
 .../nodemanager/TestLinuxContainerExecutor.java |   2 +-
 .../TestLinuxContainerExecutorWithMocks.java    |   2 +-
 .../server/nodemanager/TestNodeManager.java     |   2 +-
 .../nodemanager/TestNodeStatusUpdater.java      | 100 +---
 .../amrmproxy/BaseAMRMProxyTest.java            |  46 +-
 .../resources/TestResourceHandlerModule.java    |   8 +-
 .../resources/gpu/TestGpuResourceHandler.java   | 382 +++++++++++++
 .../TestContainersMonitorResourceChange.java    |   2 +-
 .../TestResourcePluginManager.java              | 261 +++++++++
 .../resourceplugin/gpu/TestGpuDiscoverer.java   | 123 +++++
 .../dao/gpu/TestGpuDeviceInformationParser.java |  50 ++
 .../test/resources/nvidia-smi-sample-xml-output | 547 +++++++++++++++++++
 .../resourcemanager/webapp/dao/AppInfo.java     |   2 +-
 46 files changed, 3385 insertions(+), 245 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
index 37b50f2..9a5bc79 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
@@ -206,8 +206,8 @@ public abstract class Resource implements Comparable<Resource> {
    *
    * @return Map of resource name to ResourceInformation
    */
-  @Public
-  @Evolving
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
   public ResourceInformation[] getResources() {
     return resources;
   }
@@ -220,7 +220,7 @@ public abstract class Resource implements Comparable<Resource> {
    * @throws ResourceNotFoundException if the resource can't be found
    */
   @Public
-  @Evolving
+  @InterfaceStability.Unstable
   public ResourceInformation getResourceInformation(String resource)
       throws ResourceNotFoundException {
     Integer index = ResourceUtils.getResourceTypeIndex().get(resource);
@@ -240,8 +240,8 @@ public abstract class Resource implements Comparable<Resource> {
    * @throws ResourceNotFoundException
    *           if the resource can't be found
    */
-  @Public
-  @Evolving
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
   public ResourceInformation getResourceInformation(int index)
       throws ResourceNotFoundException {
     ResourceInformation ri = null;
@@ -262,7 +262,7 @@ public abstract class Resource implements Comparable<Resource> {
    * @throws ResourceNotFoundException if the resource can't be found
    */
   @Public
-  @Evolving
+  @InterfaceStability.Unstable
   public long getResourceValue(String resource)
       throws ResourceNotFoundException {
     return getResourceInformation(resource).getValue();
@@ -276,7 +276,7 @@ public abstract class Resource implements Comparable<Resource> {
    * @throws ResourceNotFoundException if the resource is not found
    */
   @Public
-  @Evolving
+  @InterfaceStability.Unstable
   public void setResourceInformation(String resource,
       ResourceInformation resourceInformation)
       throws ResourceNotFoundException {
@@ -302,8 +302,8 @@ public abstract class Resource implements Comparable<Resource> {
    * @throws ResourceNotFoundException
    *           if the resource is not found
    */
-  @Public
-  @Evolving
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
   public void setResourceInformation(int index,
       ResourceInformation resourceInformation)
       throws ResourceNotFoundException {
@@ -323,7 +323,7 @@ public abstract class Resource implements Comparable<Resource> {
    * @throws ResourceNotFoundException if the resource is not found
    */
   @Public
-  @Evolving
+  @InterfaceStability.Unstable
   public void setResourceValue(String resource, long value)
       throws ResourceNotFoundException {
     if (resource.equals(ResourceInformation.MEMORY_URI)) {
@@ -350,8 +350,8 @@ public abstract class Resource implements Comparable<Resource> {
    * @throws ResourceNotFoundException
    *           if the resource is not found
    */
-  @Public
-  @Evolving
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
   public void setResourceValue(int index, long value)
       throws ResourceNotFoundException {
     try {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
index 785b311..84b4748 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
@@ -18,11 +18,14 @@
 
 package org.apache.hadoop.yarn.api.records;
 
-import org.apache.curator.shaded.com.google.common.reflect.ClassPath;
+import com.google.common.collect.ImmutableMap;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
 import org.apache.hadoop.yarn.util.UnitsConversionUtil;
 
+import java.util.Map;
+
 /**
  * Class to encapsulate information about a Resource - the name of the resource,
  * the units(milli, micro, etc), the type(countable), and the value.
@@ -36,13 +39,20 @@ public class ResourceInformation implements Comparable<ResourceInformation> {
   private long minimumAllocation;
   private long maximumAllocation;
 
+  // Known resource types
   public static final String MEMORY_URI = "memory-mb";
   public static final String VCORES_URI = "vcores";
+  public static final String GPU_URI = "yarn.io/gpu";
 
   public static final ResourceInformation MEMORY_MB =
       ResourceInformation.newInstance(MEMORY_URI, "Mi");
   public static final ResourceInformation VCORES =
       ResourceInformation.newInstance(VCORES_URI);
+  public static final ResourceInformation GPUS =
+      ResourceInformation.newInstance(GPU_URI);
+
+  public static final Map<String, ResourceInformation> MANDATORY_RESOURCES =
+      ImmutableMap.of(MEMORY_URI, MEMORY_MB, VCORES_URI, VCORES, GPU_URI, GPUS);
 
   /**
    * Get the name for the resource.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
index e9be6c3..43a339c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.api.records;
 import java.io.Serializable;
 
 import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index ec1869f..63e46d9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1411,6 +1411,39 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT =
       NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit";
 
+  /**
+   * Prefix for computation resources, example of computation resources like
+   * GPU / FPGA / TPU, etc.
+   */
+  @Private
+  public static final String NM_RESOURCE_PLUGINS =
+      NM_PREFIX + "resource-plugins";
+
+  /**
+   * Prefix for gpu configurations. Work in progress: This configuration
+   * parameter may be changed/removed in the future.
+   */
+  @Private
+  public static final String NM_GPU_RESOURCE_PREFIX =
+      NM_RESOURCE_PLUGINS + ".gpu.";
+
+  @Private
+  public static final String NM_GPU_ALLOWED_DEVICES =
+      NM_GPU_RESOURCE_PREFIX + "allowed-gpu-devices";
+  @Private
+  public static final String AUTOMATICALLY_DISCOVER_GPU_DEVICES = "auto";
+
+  /**
+   * This setting controls where to how to invoke GPU binaries
+   */
+  @Private
+  public static final String NM_GPU_PATH_TO_EXEC =
+      NM_GPU_RESOURCE_PREFIX + "path-to-discovery-executables";
+
+  @Private
+  public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = "";
+
+
   /** NM Webapp address.**/
   public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address";
   public static final int DEFAULT_NM_WEBAPP_PORT = 8042;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
index 110453a..f3edc74 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
@@ -46,11 +46,11 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
 /**
  * Helper class to read the resource-types to be supported by the system.
  */
-@InterfaceAudience.Public
-@InterfaceStability.Unstable
 public class ResourceUtils {
 
   public static final String UNITS = ".units";
@@ -65,7 +65,6 @@ public class ResourceUtils {
   private static final Map<String, Integer> RESOURCE_NAME_TO_INDEX =
       new ConcurrentHashMap<String, Integer>();
   private static volatile Map<String, ResourceInformation> resourceTypes;
-  private static volatile String[] resourceNamesArray;
   private static volatile ResourceInformation[] resourceTypesArray;
   private static volatile boolean initializedNodeResources = false;
   private static volatile Map<String, ResourceInformation> readOnlyNodeResources;
@@ -85,33 +84,32 @@ public class ResourceUtils {
      */
     String key = "memory";
     if (resourceInformationMap.containsKey(key)) {
-      LOG.warn("Attempt to define resource '" + key +
-          "', but it is not allowed.");
-      throw new YarnRuntimeException("Attempt to re-define mandatory resource '"
-          + key + "'.");
-    }
-
-    if (resourceInformationMap.containsKey(MEMORY)) {
-      ResourceInformation memInfo = resourceInformationMap.get(MEMORY);
-      String memUnits = ResourceInformation.MEMORY_MB.getUnits();
-      ResourceTypes memType = ResourceInformation.MEMORY_MB.getResourceType();
-      if (!memInfo.getUnits().equals(memUnits) || !memInfo.getResourceType()
-          .equals(memType)) {
-        throw new YarnRuntimeException(
-            "Attempt to re-define mandatory resource 'memory-mb'. It can only"
-                + " be of type 'COUNTABLE' and have units 'Mi'.");
-      }
+      LOG.warn(
+          "Attempt to define resource '" + key + "', but it is not allowed.");
+      throw new YarnRuntimeException(
+          "Attempt to re-define mandatory resource '" + key + "'.");
     }
 
-    if (resourceInformationMap.containsKey(VCORES)) {
-      ResourceInformation vcoreInfo = resourceInformationMap.get(VCORES);
-      String vcoreUnits = ResourceInformation.VCORES.getUnits();
-      ResourceTypes vcoreType = ResourceInformation.VCORES.getResourceType();
-      if (!vcoreInfo.getUnits().equals(vcoreUnits) || !vcoreInfo
-          .getResourceType().equals(vcoreType)) {
-        throw new YarnRuntimeException(
-            "Attempt to re-define mandatory resource 'vcores'. It can only be"
-                + " of type 'COUNTABLE' and have units ''(no units).");
+    for (Map.Entry<String, ResourceInformation> mandatoryResourceEntry :
+        ResourceInformation.MANDATORY_RESOURCES.entrySet()) {
+      key = mandatoryResourceEntry.getKey();
+      ResourceInformation mandatoryRI = mandatoryResourceEntry.getValue();
+
+      ResourceInformation newDefinedRI = resourceInformationMap.get(key);
+      if (newDefinedRI != null) {
+        String expectedUnit = mandatoryRI.getUnits();
+        ResourceTypes expectedType = mandatoryRI.getResourceType();
+        String actualUnit = newDefinedRI.getUnits();
+        ResourceTypes actualType = newDefinedRI.getResourceType();
+
+        if (!expectedUnit.equals(actualUnit) || !expectedType.equals(
+            actualType)) {
+          throw new YarnRuntimeException("Defined mandatory resource type="
+              + key + " inside resource-types.xml, however its type or "
+              + "unit is conflict to mandatory resource types, expected type="
+              + expectedType + ", unit=" + expectedUnit + "; actual type="
+              + actualType + " actual unit=" + actualUnit);
+        }
       }
     }
   }
@@ -270,7 +268,6 @@ public class ResourceUtils {
 
   private static void updateKnownResources() {
     // Update resource names.
-    resourceNamesArray = new String[resourceTypes.size()];
     resourceTypesArray = new ResourceInformation[resourceTypes.size()];
 
     int index = 2;
@@ -278,14 +275,11 @@ public class ResourceUtils {
       if (resInfo.getName().equals(MEMORY)) {
         resourceTypesArray[0] = ResourceInformation
             .newInstance(resourceTypes.get(MEMORY));
-        resourceNamesArray[0] = MEMORY;
       } else if (resInfo.getName().equals(VCORES)) {
         resourceTypesArray[1] = ResourceInformation
             .newInstance(resourceTypes.get(VCORES));
-        resourceNamesArray[1] = VCORES;
       } else {
         resourceTypesArray[index] = ResourceInformation.newInstance(resInfo);
-        resourceNamesArray[index] = resInfo.getName();
         index++;
       }
     }
@@ -319,18 +313,6 @@ public class ResourceUtils {
         YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE);
   }
 
-  /**
-   * Get resource names array, this is mostly for performance perspective. Never
-   * modify returned array.
-   *
-   * @return resourceNamesArray
-   */
-  public static String[] getResourceNamesArray() {
-    initializeResourceTypesIfNeeded(null,
-        YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE);
-    return resourceNamesArray;
-  }
-
   public static ResourceInformation[] getResourceTypesArray() {
     initializeResourceTypesIfNeeded(null,
         YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java
index 1e925d7..d7c799d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java
@@ -19,8 +19,4 @@
  * Package org.apache.hadoop.yarn.util.resource contains classes
  * which is used as utility class for resource profile computations.
  */
-@InterfaceAudience.Public
-@InterfaceStability.Unstable
-package org.apache.hadoop.yarn.util.resource;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
\ No newline at end of file
+package org.apache.hadoop.yarn.util.resource;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 768deb2..20442d9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -3381,6 +3381,16 @@
     <value>0.0.0.0:8091</value>
   </property>
 
+  <!-- resource types configuration -->
+  <property>
+    <name>yarn.resource-types</name>
+    <value></value>
+    <description>
+    The resource types to be used for scheduling. Use resource-types.xml
+    to specify details about the individual resource types.
+    </description>
+  </property>
+ 
   <property>
     <name>yarn.resourcemanager.display.per-user-apps</name>
     <value>false</value>
@@ -3481,11 +3491,43 @@
 
   <!-- resource types configuration -->
   <property>
-    <name>yarn.resource-types</name>
+    <description>
+      When yarn.nodemanager.resource.gpu.allowed-gpu-devices=auto specified,
+      YARN NodeManager needs to run GPU discovery binary (now only support
+      nvidia-smi) to get GPU-related information.
+      When value is empty (default), YARN NodeManager will try to locate
+      discovery executable itself.
+      An example of the config value is: /usr/local/bin/nvidia-smi
+    </description>
+    <name>yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables</name>
     <value></value>
+  </property>
+
+  <property>
     <description>
-    The resource types to be used for scheduling. Use resource-types.xml
-    to specify details about the individual resource types.
+      Enable additional discovery/isolation of resources on the NodeManager,
+      split by comma. By default, this is empty. Acceptable values: { "yarn-io/gpu" }.
+    </description>
+    <name>yarn.nodemanager.resource-plugins</name>
+    <value></value>
+  </property>
+
+  <property>
+    <description>
+      Specify GPU devices which can be managed by YARN NodeManager, split by comma
+      Number of GPU devices will be reported to RM to make scheduling decisions.
+      Set to auto (default) let YARN automatically discover GPU resource from
+      system.
+      Manually specify GPU devices if auto detect GPU device failed or admin
+      only want subset of GPU devices managed by YARN. GPU device is identified
+      by their minor device number. A common approach to get minor device number
+      of GPUs is using "nvidia-smi -q" and search "Minor Number" output. An
+      example of manual specification is "0,1,2,4" to allow YARN NodeManager
+      to manage GPU devices with minor number 0/1/2/4.
     </description>
+    <name>yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices</name>
+    <value>auto</value>
   </property>
+
+
 </configuration>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
index d6bab92..80555ca 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
@@ -52,6 +52,23 @@ public class TestResourceUtils {
     }
   }
 
+  public static void addNewTypesToResources(String... resourceTypes) {
+    // Initialize resource map
+    Map<String, ResourceInformation> riMap = new HashMap<>();
+
+    // Initialize mandatory resources
+    riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB);
+    riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES);
+
+    for (String newResource : resourceTypes) {
+      riMap.put(newResource, ResourceInformation
+          .newInstance(newResource, "", 0, ResourceTypes.COUNTABLE, 0,
+              Integer.MAX_VALUE));
+    }
+
+    ResourceUtils.initializeResourcesFromResourceInformationMap(riMap);
+  }
+
   @Before
   public void setup() {
     ResourceUtils.resetResourceTypes();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index 9454da4..e65b677 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -112,9 +112,10 @@ public abstract class ContainerExecutor implements Configurable {
    * Run the executor initialization steps.
    * Verify that the necessary configs and permissions are in place.
    *
+   * @param nmContext Context of NM
    * @throws IOException if initialization fails
    */
-  public abstract void init() throws IOException;
+  public abstract void init(Context nmContext) throws IOException;
 
   /**
    * This function localizes the JAR file on-demand.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
index 33cefea..7e16034 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManag
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
@@ -122,4 +123,6 @@ public interface Context {
   void setNMTimelinePublisher(NMTimelinePublisher nmMetricsPublisher);
 
   NMTimelinePublisher getNMTimelinePublisher();
+
+  ResourcePluginManager getResourcePluginManager();
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
index b54b7f5..e659c3e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
@@ -134,7 +134,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
   }
 
   @Override
-  public void init() throws IOException {
+  public void init(Context nmContext) throws IOException {
     // nothing to do or verify here
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 765c49a..04f27c2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Optional;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -281,7 +282,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
   }
 
   @Override
-  public void init() throws IOException {
+  public void init(Context nmContext) throws IOException {
     Configuration conf = super.getConf();
 
     // Send command to executor which will just start up,
@@ -305,7 +306,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
 
     try {
       resourceHandlerChain = ResourceHandlerModule
-          .getConfiguredResourceHandlerChain(conf);
+          .getConfiguredResourceHandlerChain(conf, nmContext);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Resource handler chain enabled = " + (resourceHandlerChain
             != null));
@@ -845,4 +846,9 @@ public class LinuxContainerExecutor extends ContainerExecutor {
           e);
     }
   }
+
+  @VisibleForTesting
+  public ResourceHandler getResourceHandler() {
+    return resourceHandlerChain;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 431a894..3f27abe 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -18,23 +18,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager;
 
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ConcurrentSkipListMap;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
-import org.apache.hadoop.yarn.state.MultiStateTransitionListener;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -65,12 +49,16 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
 import org.apache.hadoop.yarn.server.api.records.AppCollectorData;
 import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.collectormanager.NMCollectorService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider;
 import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
@@ -78,14 +66,25 @@ import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ScriptBasedNodeLabel
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMLeveldbStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
-import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer;
+import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
+import org.apache.hadoop.yarn.state.MultiStateTransitionListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 public class NodeManager extends CompositeService 
     implements EventHandler<NodeManagerEvent> {
@@ -331,6 +330,18 @@ public class NodeManager extends CompositeService
         nmCheckintervalTime, scriptTimeout, scriptArgs);
   }
 
+  @VisibleForTesting
+  protected ResourcePluginManager createResourcePluginManager() {
+    return new ResourcePluginManager();
+  }
+
+  @VisibleForTesting
+  protected ContainerExecutor createContainerExecutor(Configuration conf) {
+    return ReflectionUtils.newInstance(
+        conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR,
+            DefaultContainerExecutor.class, ContainerExecutor.class), conf);
+  }
+
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
 
@@ -359,11 +370,20 @@ public class NodeManager extends CompositeService
     
     this.aclsManager = new ApplicationACLsManager(conf);
 
-    ContainerExecutor exec = ReflectionUtils.newInstance(
-        conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR,
-          DefaultContainerExecutor.class, ContainerExecutor.class), conf);
+    boolean isDistSchedulingEnabled =
+        conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED,
+            YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED);
+
+    this.context = createNMContext(containerTokenSecretManager,
+        nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf);
+
+    ResourcePluginManager pluginManager = createResourcePluginManager();
+    pluginManager.initialize(context);
+    ((NMContext)context).setResourcePluginManager(pluginManager);
+
+    ContainerExecutor exec = createContainerExecutor(conf);
     try {
-      exec.init();
+      exec.init(context);
     } catch (IOException e) {
       throw new YarnRuntimeException("Failed to initialize container executor", e);
     }    
@@ -379,13 +399,6 @@ public class NodeManager extends CompositeService
             getNodeHealthScriptRunner(conf), dirsHandler);
     addService(nodeHealthChecker);
 
-    boolean isDistSchedulingEnabled =
-        conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED,
-            YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED);
-
-    this.context = createNMContext(containerTokenSecretManager,
-        nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf);
-
 
     ((NMContext)context).setContainerExecutor(exec);
 
@@ -459,6 +472,12 @@ public class NodeManager extends CompositeService
     try {
       super.serviceStop();
       DefaultMetricsSystem.shutdown();
+
+      // Cleanup ResourcePluginManager
+      ResourcePluginManager rpm = context.getResourcePluginManager();
+      if (rpm != null) {
+        rpm.cleanup();
+      }
     } finally {
       // YARN-3641: NM's services stop get failed shouldn't block the
       // release of NMLevelDBStore.
@@ -596,6 +615,8 @@ public class NodeManager extends CompositeService
 
     private NMTimelinePublisher nmTimelinePublisher;
 
+    private ResourcePluginManager resourcePluginManager;
+
     public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
         NMTokenSecretManagerInNM nmTokenSecretManager,
         LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager,
@@ -796,6 +817,15 @@ public class NodeManager extends CompositeService
     public NMTimelinePublisher getNMTimelinePublisher() {
       return nmTimelinePublisher;
     }
+
+    public ResourcePluginManager getResourcePluginManager() {
+      return resourcePluginManager;
+    }
+
+    public void setResourcePluginManager(
+        ResourcePluginManager resourcePluginManager) {
+      this.resourcePluginManager = resourcePluginManager;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index 888ee85..d776bdf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -33,6 +33,9 @@ import java.util.Map.Entry;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.ConcurrentLinkedQueue;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -178,14 +181,15 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     long memoryMb = totalResource.getMemorySize();
     float vMemToPMem =
         conf.getFloat(
-            YarnConfiguration.NM_VMEM_PMEM_RATIO, 
-            YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); 
+            YarnConfiguration.NM_VMEM_PMEM_RATIO,
+            YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
     long virtualMemoryMb = (long)Math.ceil(memoryMb * vMemToPMem);
-    
     int virtualCores = totalResource.getVirtualCores();
-    LOG.info("Nodemanager resources: memory set to " + memoryMb + "MB.");
-    LOG.info("Nodemanager resources: vcores set to " + virtualCores + ".");
-    LOG.info("Nodemanager resources: " + totalResource);
+
+    // Update configured resources via plugins.
+    updateConfiguredResourcesViaPlugins(totalResource);
+
+    LOG.info("Nodemanager resources is set to: " + totalResource);
 
     metrics.addResource(totalResource);
 
@@ -342,12 +346,27 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     return ServerRMProxy.createRMProxy(conf, ResourceTracker.class);
   }
 
+  private void updateConfiguredResourcesViaPlugins(
+      Resource configuredResource) throws YarnException {
+    ResourcePluginManager pluginManager = context.getResourcePluginManager();
+    if (pluginManager != null && pluginManager.getNameToPlugins() != null) {
+      // Update configured resource
+      for (ResourcePlugin resourcePlugin : pluginManager.getNameToPlugins()
+          .values()) {
+        if (resourcePlugin.getNodeResourceHandlerInstance() != null) {
+          resourcePlugin.getNodeResourceHandlerInstance()
+              .updateConfiguredResource(configuredResource);
+        }
+      }
+    }
+  }
+
   @VisibleForTesting
   protected void registerWithRM()
       throws YarnException, IOException {
     RegisterNodeManagerResponse regNMResponse;
     Set<NodeLabel> nodeLabels = nodeLabelsHandler.getNodeLabelsForRegistration();
- 
+
     // Synchronize NM-RM registration with
     // ContainerManagerImpl#increaseContainersResource and
     // ContainerManagerImpl#startContainers to avoid race condition
@@ -358,6 +377,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
           RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
               nodeManagerVersionId, containerReports, getRunningApplications(),
               nodeLabels, physicalResource);
+
       if (containerReports != null) {
         LOG.info("Registering with RM using containers :" + containerReports);
       }
@@ -406,7 +426,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     if (masterKey != null) {
       this.context.getContainerTokenSecretManager().setMasterKey(masterKey);
     }
-    
+
     masterKey = regNMResponse.getNMTokenMasterKey();
     if (masterKey != null) {
       this.context.getNMTokenSecretManager().setMasterKey(masterKey);
@@ -733,7 +753,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
       }
     }
   }
-  
+
   @Override
   public long getRMIdentifier() {
     return this.rmIdentifier;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
index 8402a16..db0b225 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
@@ -51,6 +51,7 @@ public class PrivilegedOperation {
     TC_READ_STATS("--tc-read-stats"),
     ADD_PID_TO_CGROUP(""), //no CLI switch supported yet.
     RUN_DOCKER_CMD("--run-docker"),
+    GPU("--module-gpu"),
     LIST_AS_USER(""); //no CLI switch supported yet.
 
     private final String option;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
index 955d216..72bf30c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
@@ -20,6 +20,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -135,7 +136,8 @@ public class ResourceHandlerChain implements ResourceHandler {
     return allOperations;
   }
 
-  List<ResourceHandler> getResourceHandlerList() {
+  @VisibleForTesting
+  public List<ResourceHandler> getResourceHandlerList() {
     return Collections.unmodifiableList(resourceHandlers);
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
index 3c61cd4..ce850ab 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
@@ -21,25 +21,28 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
 
 import com.google.common.annotations.VisibleForTesting;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler;
 import org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Arrays;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 /**
  * Provides mechanisms to get various resource handlers - cpu, memory, network,
@@ -206,22 +209,41 @@ public class ResourceHandlerModule {
   }
 
   private static void initializeConfiguredResourceHandlerChain(
-      Configuration conf) throws ResourceHandlerException {
+      Configuration conf, Context nmContext)
+      throws ResourceHandlerException {
     ArrayList<ResourceHandler> handlerList = new ArrayList<>();
 
     addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
+    addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
     resourceHandlerChain = new ResourceHandlerChain(handlerList);
   }
 
+  private static void addHandlersFromConfiguredResourcePlugins(
+      List<ResourceHandler> handlerList, Configuration conf,
+      Context nmContext) throws ResourceHandlerException {
+    ResourcePluginManager pluginManager = nmContext.getResourcePluginManager();
+    if (pluginManager != null) {
+       Map<String, ResourcePlugin> pluginMap = pluginManager.getNameToPlugins();
+       if (pluginMap != null) {
+        for (ResourcePlugin plugin : pluginMap.values()) {
+          addHandlerIfNotNull(handlerList, plugin
+              .createResourceHandler(nmContext,
+                  getInitializedCGroupsHandler(conf),
+                  PrivilegedOperationExecutor.getInstance(conf)));
+        }
+      }
+    }
+  }
+
   public static ResourceHandlerChain getConfiguredResourceHandlerChain(
-      Configuration conf) throws ResourceHandlerException {
+      Configuration conf, Context nmContext) throws ResourceHandlerException {
     if (resourceHandlerChain == null) {
       synchronized (ResourceHandlerModule.class) {
         if (resourceHandlerChain == null) {
-          initializeConfiguredResourceHandlerChain(conf);
+          initializeConfiguredResourceHandlerChain(conf, nmContext);
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
new file mode 100644
index 0000000..d6bae09
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+/**
+ * Allocate GPU resources according to requirements
+ */
+public class GpuResourceAllocator {
+  final static Log LOG = LogFactory.getLog(GpuResourceAllocator.class);
+
+  private Set<Integer> allowedGpuDevices = new TreeSet<>();
+  private Map<Integer, ContainerId> usedDevices = new TreeMap<>();
+  private Context nmContext;
+
+  public GpuResourceAllocator(Context ctx) {
+    this.nmContext = ctx;
+  }
+
+  /**
+   * Contains allowed and denied devices with minor number.
+   * Denied devices will be useful for cgroups devices module to do blacklisting
+   */
+  static class GpuAllocation {
+    private Set<Integer> allowed = Collections.emptySet();
+    private Set<Integer> denied = Collections.emptySet();
+
+    GpuAllocation(Set<Integer> allowed, Set<Integer> denied) {
+      if (allowed != null) {
+        this.allowed = ImmutableSet.copyOf(allowed);
+      }
+      if (denied != null) {
+        this.denied = ImmutableSet.copyOf(denied);
+      }
+    }
+
+    public Set<Integer> getAllowedGPUs() {
+      return allowed;
+    }
+
+    public Set<Integer> getDeniedGPUs() {
+      return denied;
+    }
+  }
+
+  /**
+   * Add GPU to allowed list
+   * @param minorNumber minor number of the GPU device.
+   */
+  public synchronized void addGpu(int minorNumber) {
+    allowedGpuDevices.add(minorNumber);
+  }
+
+  private String getResourceHandlerExceptionMessage(int numRequestedGpuDevices,
+      ContainerId containerId) {
+    return "Failed to find enough GPUs, requestor=" + containerId
+        + ", #RequestedGPUs=" + numRequestedGpuDevices + ", #availableGpus="
+        + getAvailableGpus();
+  }
+
+  @VisibleForTesting
+  public synchronized int getAvailableGpus() {
+    return allowedGpuDevices.size() - usedDevices.size();
+  }
+
+  public synchronized void recoverAssignedGpus(ContainerId containerId)
+      throws ResourceHandlerException {
+    Container c = nmContext.getContainers().get(containerId);
+    if (null == c) {
+      throw new ResourceHandlerException(
+          "This shouldn't happen, cannot find container with id="
+              + containerId);
+    }
+
+    for (Serializable deviceId : c.getResourceMappings().getAssignedResources(
+        GPU_URI)){
+      if (!(deviceId instanceof String)) {
+        throw new ResourceHandlerException(
+            "Trying to recover device id, however it"
+                + " is not String, this shouldn't happen");
+      }
+
+
+      int devId;
+      try {
+        devId = Integer.parseInt((String)deviceId);
+      } catch (NumberFormatException e) {
+        throw new ResourceHandlerException("Failed to recover device id because"
+            + "it is not a valid integer, devId:" + deviceId);
+      }
+
+      // Make sure it is in allowed GPU device.
+      if (!allowedGpuDevices.contains(devId)) {
+        throw new ResourceHandlerException("Try to recover device id = " + devId
+            + " however it is not in allowed device list:" + StringUtils
+            .join(",", allowedGpuDevices));
+      }
+
+      // Make sure it is not occupied by anybody else
+      if (usedDevices.containsKey(devId)) {
+        throw new ResourceHandlerException("Try to recover device id = " + devId
+            + " however it is already assigned to container=" + usedDevices
+            .get(devId) + ", please double check what happened.");
+      }
+
+      usedDevices.put(devId, containerId);
+    }
+  }
+
+  private int getRequestedGpus(Resource requestedResource) {
+    try {
+      return Long.valueOf(requestedResource.getResourceValue(
+          GPU_URI)).intValue();
+    } catch (ResourceNotFoundException e) {
+      return 0;
+    }
+  }
+
+  /**
+   * Assign GPU to requestor
+   * @param container container to allocate
+   * @return List of denied Gpus with minor numbers
+   * @throws ResourceHandlerException When failed to
+   */
+  public synchronized GpuAllocation assignGpus(Container container)
+      throws ResourceHandlerException {
+    Resource requestedResource = container.getResource();
+    ContainerId containerId = container.getContainerId();
+    int numRequestedGpuDevices = getRequestedGpus(requestedResource);
+    // Assign Gpus to container if requested some.
+    if (numRequestedGpuDevices > 0) {
+      if (numRequestedGpuDevices > getAvailableGpus()) {
+        throw new ResourceHandlerException(
+            getResourceHandlerExceptionMessage(numRequestedGpuDevices,
+                containerId));
+      }
+
+      Set<Integer> assignedGpus = new HashSet<>();
+
+      for (int deviceNum : allowedGpuDevices) {
+        if (!usedDevices.containsKey(deviceNum)) {
+          usedDevices.put(deviceNum, containerId);
+          assignedGpus.add(deviceNum);
+          if (assignedGpus.size() == numRequestedGpuDevices) {
+            break;
+          }
+        }
+      }
+
+      // Record in state store if we allocated anything
+      if (!assignedGpus.isEmpty()) {
+        List<Serializable> allocatedDevices = new ArrayList<>();
+        for (int gpu : assignedGpus) {
+          allocatedDevices.add(String.valueOf(gpu));
+        }
+        try {
+          // Update Container#getResourceMapping.
+          ResourceMappings.AssignedResources assignedResources =
+              new ResourceMappings.AssignedResources();
+          assignedResources.updateAssignedResources(allocatedDevices);
+          container.getResourceMappings().addAssignedResources(GPU_URI,
+              assignedResources);
+
+          // Update state store.
+          nmContext.getNMStateStore().storeAssignedResources(containerId,
+              GPU_URI, allocatedDevices);
+        } catch (IOException e) {
+          cleanupAssignGpus(containerId);
+          throw new ResourceHandlerException(e);
+        }
+      }
+
+      return new GpuAllocation(assignedGpus,
+          Sets.difference(allowedGpuDevices, assignedGpus));
+    }
+    return new GpuAllocation(null, allowedGpuDevices);
+  }
+
+  /**
+   * Clean up all Gpus assigned to containerId
+   * @param containerId containerId
+   */
+  public synchronized void cleanupAssignGpus(ContainerId containerId) {
+    Iterator<Map.Entry<Integer, ContainerId>> iter =
+        usedDevices.entrySet().iterator();
+    while (iter.hasNext()) {
+      if (iter.next().getValue().equals(containerId)) {
+        iter.remove();
+      }
+    }
+  }
+
+  @VisibleForTesting
+  public synchronized Map<Integer, ContainerId> getDeviceAllocationMapping() {
+     return new HashMap<>(usedDevices);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
new file mode 100644
index 0000000..7144bb2
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class GpuResourceHandlerImpl implements ResourceHandler {
+  final static Log LOG = LogFactory
+      .getLog(GpuResourceHandlerImpl.class);
+
+  // This will be used by container-executor to add necessary clis
+  public static final String EXCLUDED_GPUS_CLI_OPTION = "--excluded_gpus";
+  public static final String CONTAINER_ID_CLI_OPTION = "--container_id";
+
+  private GpuResourceAllocator gpuAllocator;
+  private CGroupsHandler cGroupsHandler;
+  private PrivilegedOperationExecutor privilegedOperationExecutor;
+
+  public GpuResourceHandlerImpl(Context nmContext,
+      CGroupsHandler cGroupsHandler,
+      PrivilegedOperationExecutor privilegedOperationExecutor) {
+    this.cGroupsHandler = cGroupsHandler;
+    this.privilegedOperationExecutor = privilegedOperationExecutor;
+    gpuAllocator = new GpuResourceAllocator(nmContext);
+  }
+
+  @Override
+  public List<PrivilegedOperation> bootstrap(Configuration configuration)
+      throws ResourceHandlerException {
+    List<Integer> minorNumbersOfUsableGpus;
+    try {
+      minorNumbersOfUsableGpus = GpuDiscoverer.getInstance()
+          .getMinorNumbersOfGpusUsableByYarn();
+    } catch (YarnException e) {
+      LOG.error("Exception when trying to get usable GPU device", e);
+      throw new ResourceHandlerException(e);
+    }
+
+    for (int minorNumber : minorNumbersOfUsableGpus) {
+      gpuAllocator.addGpu(minorNumber);
+    }
+
+    // And initialize cgroups
+    this.cGroupsHandler.initializeCGroupController(
+        CGroupsHandler.CGroupController.DEVICES);
+
+    return null;
+  }
+
+  @Override
+  public synchronized List<PrivilegedOperation> preStart(Container container)
+      throws ResourceHandlerException {
+    String containerIdStr = container.getContainerId().toString();
+
+    // Assign Gpus to container if requested some.
+    GpuResourceAllocator.GpuAllocation allocation = gpuAllocator.assignGpus(
+        container);
+
+    // Create device cgroups for the container
+    cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES,
+        containerIdStr);
+    try {
+      // Execute c-e to setup GPU isolation before launch the container
+      PrivilegedOperation privilegedOperation = new PrivilegedOperation(
+          PrivilegedOperation.OperationType.GPU, Arrays
+          .asList(CONTAINER_ID_CLI_OPTION, containerIdStr));
+      if (!allocation.getDeniedGPUs().isEmpty()) {
+        privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION,
+            StringUtils.join(",", allocation.getDeniedGPUs())));
+      }
+
+      privilegedOperationExecutor.executePrivilegedOperation(
+          privilegedOperation, true);
+    } catch (PrivilegedOperationException e) {
+      cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES,
+          containerIdStr);
+      LOG.warn("Could not update cgroup for container", e);
+      throw new ResourceHandlerException(e);
+    }
+
+    List<PrivilegedOperation> ret = new ArrayList<>();
+    ret.add(new PrivilegedOperation(
+        PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
+        PrivilegedOperation.CGROUP_ARG_PREFIX
+            + cGroupsHandler.getPathForCGroupTasks(
+            CGroupsHandler.CGroupController.DEVICES, containerIdStr)));
+
+    return ret;
+  }
+
+  @VisibleForTesting
+  public GpuResourceAllocator getGpuAllocator() {
+    return gpuAllocator;
+  }
+
+  @Override
+  public List<PrivilegedOperation> reacquireContainer(ContainerId containerId)
+      throws ResourceHandlerException {
+    gpuAllocator.recoverAssignedGpus(containerId);
+    return null;
+  }
+
+  @Override
+  public synchronized List<PrivilegedOperation> postComplete(
+      ContainerId containerId) throws ResourceHandlerException {
+    gpuAllocator.cleanupAssignGpus(containerId);
+    cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES,
+        containerId.toString());
+    return null;
+  }
+
+  @Override
+  public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
new file mode 100644
index 0000000..88f77ed
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+/**
+ * Plugins to handle resources on a node. This will be used by
+ * {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater}
+ */
+public abstract class NodeResourceUpdaterPlugin {
+  /**
+   * Update configured resource for the given component.
+   * @param res resource passed in by external mododule (such as
+   *            {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater}
+   * @throws YarnException when any issue happens.
+   */
+  public abstract void updateConfiguredResource(Resource res)
+      throws YarnException;
+
+  /**
+   * This method will be called when the node's resource is loaded from
+   * dynamic-resources.xml in ResourceManager.
+   *
+   * @param newResource newResource reported by RM
+   * @throws YarnException when any mismatch between NM/RM
+   */
+  public void handleUpdatedResourceFromRM(Resource newResource) throws
+      YarnException {
+    // by default do nothing, subclass should implement this method when any
+    // special activities required upon new resource reported by RM.
+  }
+
+  // TODO: add implementation to update node attribute once YARN-3409 merged.
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
new file mode 100644
index 0000000..6e134b3
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain;
+
+/**
+ * {@link ResourcePlugin} is an interface for node manager to easier support
+ * discovery/manage/isolation for new resource types.
+ *
+ * <p>
+ * It has two major part: {@link ResourcePlugin#createResourceHandler(Context,
+ * CGroupsHandler, PrivilegedOperationExecutor)} and
+ * {@link ResourcePlugin#getNodeResourceHandlerInstance()}, see javadocs below
+ * for more details.
+ * </p>
+ */
+public interface ResourcePlugin {
+  /**
+   * Initialize the plugin, this will be invoked during NM startup.
+   * @param context NM Context
+   * @throws YarnException when any issue occurs
+   */
+  void initialize(Context context) throws YarnException;
+
+  /**
+   * Plugin needs to return {@link ResourceHandler} when any special isolation
+   * required for the resource type. This will be added to
+   * {@link ResourceHandlerChain} during NodeManager startup. When no special
+   * isolation need, return null.
+   *
+   * @param nmContext NodeManager context.
+   * @param cGroupsHandler CGroupsHandler
+   * @param privilegedOperationExecutor Privileged Operation Executor.
+   * @return ResourceHandler
+   */
+  ResourceHandler createResourceHandler(Context nmContext,
+      CGroupsHandler cGroupsHandler,
+      PrivilegedOperationExecutor privilegedOperationExecutor);
+
+  /**
+   * Plugin needs to return {@link NodeResourceUpdaterPlugin} when any discovery
+   * mechanism required for the resource type. For example, if we want to set
+   * resource-value during NM registration or send update during NM-RM heartbeat
+   * We can implement a {@link NodeResourceUpdaterPlugin} and update fields of
+   * {@link org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest}
+   * or {@link org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest}
+   *
+   * This will be invoked during every node status update or node registration,
+   * please avoid creating new instance every time.
+   *
+   * @return NodeResourceUpdaterPlugin, could be null when no discovery needed.
+   */
+  NodeResourceUpdaterPlugin getNodeResourceHandlerInstance();
+
+  /**
+   * Do cleanup of the plugin, this will be invoked when
+   * {@link org.apache.hadoop.yarn.server.nodemanager.NodeManager} stops
+   * @throws YarnException if any issue occurs
+   */
+  void cleanup() throws YarnException;
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/15319c79/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
new file mode 100644
index 0000000..73d6038
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuResourcePlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+/**
+ * Manages {@link ResourcePlugin} configured on this NodeManager.
+ */
+public class ResourcePluginManager {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ResourcePluginManager.class);
+  private static final Set<String> SUPPORTED_RESOURCE_PLUGINS = ImmutableSet.of(
+      GPU_URI);
+
+  private Map<String, ResourcePlugin> configuredPlugins = Collections.EMPTY_MAP;
+
+  public synchronized void initialize(Context context)
+      throws YarnException {
+    Configuration conf = context.getConf();
+    String[] plugins = conf.getStrings(YarnConfiguration.NM_RESOURCE_PLUGINS);
+
+    if (plugins != null) {
+      Map<String, ResourcePlugin> pluginMap = new HashMap<>();
+
+      // Initialize each plugins
+      for (String resourceName : plugins) {
+        resourceName = resourceName.trim();
+        if (!SUPPORTED_RESOURCE_PLUGINS.contains(resourceName)) {
+          String msg =
+              "Trying to initialize resource plugin with name=" + resourceName
+                  + ", it is not supported, list of supported plugins:"
+                  + StringUtils.join(",",
+                  SUPPORTED_RESOURCE_PLUGINS);
+          LOG.error(msg);
+          throw new YarnException(msg);
+        }
+
+        if (pluginMap.containsKey(resourceName)) {
+          // Duplicated items, ignore ...
+          continue;
+        }
+
+        ResourcePlugin plugin = null;
+        if (resourceName.equals(GPU_URI)) {
+          plugin = new GpuResourcePlugin();
+        }
+
+        if (plugin == null) {
+          throw new YarnException(
+              "This shouldn't happen, plugin=" + resourceName
+                  + " should be loaded and initialized");
+        }
+        plugin.initialize(context);
+        pluginMap.put(resourceName, plugin);
+      }
+
+      configuredPlugins = Collections.unmodifiableMap(pluginMap);
+    }
+  }
+
+  public synchronized void cleanup() throws YarnException {
+    for (ResourcePlugin plugin : configuredPlugins.values()) {
+      plugin.cleanup();
+    }
+  }
+
+  /**
+   * Get resource name (such as gpu/fpga) to plugin references.
+   * @return read-only map of resource name to plugins.
+   */
+  public synchronized Map<String, ResourcePlugin> getNameToPlugins() {
+    return configuredPlugins;
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message