hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From snem...@apache.org
Subject [hadoop] branch branch-3.2 updated: YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal
Date Fri, 12 Jul 2019 14:54:47 GMT
This is an automated email from the ASF dual-hosted git repository.

snemeth pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new c61c969  YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl
is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal
c61c969 is described below

commit c61c9696689399e339c0d4a45e588d9f39f8d819
Author: Szilard Nemeth <snemeth@apache.org>
AuthorDate: Fri Jul 12 16:51:58 2019 +0200

    YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl
is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal
    
    (cherry picked from commit c416284bb7581747beef36d7899d8680fe33abbd)
---
 .../resourceplugin/gpu/GpuResourcePlugin.java      | 22 +++++++++
 .../resourceplugin/gpu/TestGpuResourcePlugin.java  | 54 ++++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
index 393d76e..1ac6f83 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
 
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
@@ -33,8 +34,14 @@ import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInforma
 import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
 
 import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class GpuResourcePlugin implements ResourcePlugin {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(GpuResourcePlugin.class);
+
   private final GpuNodeResourceUpdateHandler resourceDiscoverHandler;
   private final GpuDiscoverer gpuDiscoverer;
   private GpuResourceHandlerImpl gpuResourceHandler = null;
@@ -84,6 +91,10 @@ public class GpuResourcePlugin implements ResourcePlugin {
   public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
     GpuDeviceInformation gpuDeviceInformation =
         gpuDiscoverer.getGpuDeviceInformation();
+
+    //At this point the gpu plugin is already enabled
+    checkGpuResourceHandler();
+
     GpuResourceAllocator gpuResourceAllocator =
         gpuResourceHandler.getGpuAllocator();
     List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpusCopy();
@@ -94,6 +105,17 @@ public class GpuResourcePlugin implements ResourcePlugin {
         assignedGpuDevices);
   }
 
+  private void checkGpuResourceHandler() throws YarnException {
+    if(gpuResourceHandler == null) {
+      String errorMsg =
+          "Linux Container Executor is not configured for the NodeManager. "
+              + "To fully enable GPU feature on the node also set "
+              + YarnConfiguration.NM_CONTAINER_EXECUTOR + " properly.";
+      LOG.warn(errorMsg);
+      throw new YarnException(errorMsg);
+    }
+  }
+
   @Override
   public String toString() {
     return GpuResourcePlugin.class.getName();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
new file mode 100644
index 0000000..888f899
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import static org.mockito.Mockito.mock;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.junit.Test;
+
+public class TestGpuResourcePlugin {
+
+  @Test(expected = YarnException.class)
+  public void testResourceHandlerNotInitialized() throws YarnException {
+    GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+    GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+        mock(GpuNodeResourceUpdateHandler.class);
+
+    GpuResourcePlugin target =
+        new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+    target.getNMResourceInfo();
+  }
+
+  @Test
+  public void testResourceHandlerIsInitialized() throws YarnException {
+    GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+    GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+        mock(GpuNodeResourceUpdateHandler.class);
+
+    GpuResourcePlugin target =
+        new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+    target.createResourceHandler(null, null, null);
+
+    //Not throwing any exception
+    target.getNMResourceInfo();
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message