From common-commits-return-95242-archive-asf-public=cust-asf.ponee.io@hadoop.apache.org Fri Jul 12 14:54:57 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id EF04B1802C7 for ; Fri, 12 Jul 2019 16:54:56 +0200 (CEST) Received: (qmail 52661 invoked by uid 500); 12 Jul 2019 14:54:52 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 52652 invoked by uid 99); 12 Jul 2019 14:54:52 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 12 Jul 2019 14:54:52 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id B153A85E10; Fri, 12 Jul 2019 14:54:47 +0000 (UTC) Date: Fri, 12 Jul 2019 14:54:47 +0000 To: "common-commits@hadoop.apache.org" Subject: [hadoop] branch branch-3.2 updated: YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <156294328747.26934.4912339906619676249@gitbox.apache.org> From: snemeth@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: hadoop X-Git-Refname: refs/heads/branch-3.2 X-Git-Reftype: branch X-Git-Oldrev: 9ed2c22d579a032db3c31bd7b6bb676b30c09af5 X-Git-Newrev: c61c9696689399e339c0d4a45e588d9f39f8d819 X-Git-Rev: c61c9696689399e339c0d4a45e588d9f39f8d819 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. snemeth pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new c61c969 YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal c61c969 is described below commit c61c9696689399e339c0d4a45e588d9f39f8d819 Author: Szilard Nemeth AuthorDate: Fri Jul 12 16:51:58 2019 +0200 YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal (cherry picked from commit c416284bb7581747beef36d7899d8680fe33abbd) --- .../resourceplugin/gpu/GpuResourcePlugin.java | 22 +++++++++ .../resourceplugin/gpu/TestGpuResourcePlugin.java | 54 ++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java index 393d76e..1ac6f83 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; @@ -33,8 +34,14 @@ import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInforma import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class GpuResourcePlugin implements ResourcePlugin { + + private static final Logger LOG = + LoggerFactory.getLogger(GpuResourcePlugin.class); + private final GpuNodeResourceUpdateHandler resourceDiscoverHandler; private final GpuDiscoverer gpuDiscoverer; private GpuResourceHandlerImpl gpuResourceHandler = null; @@ -84,6 +91,10 @@ public class GpuResourcePlugin implements ResourcePlugin { public synchronized NMResourceInfo getNMResourceInfo() throws YarnException { GpuDeviceInformation gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation(); + + //At this point the gpu plugin is already enabled + checkGpuResourceHandler(); + GpuResourceAllocator gpuResourceAllocator = gpuResourceHandler.getGpuAllocator(); List totalGpus = gpuResourceAllocator.getAllowedGpusCopy(); @@ -94,6 +105,17 @@ public class GpuResourcePlugin implements ResourcePlugin { assignedGpuDevices); } + private void checkGpuResourceHandler() throws YarnException { + if(gpuResourceHandler == null) { + String errorMsg = + "Linux Container Executor is not configured for the NodeManager. " + + "To fully enable GPU feature on the node also set " + + YarnConfiguration.NM_CONTAINER_EXECUTOR + " properly."; + LOG.warn(errorMsg); + throw new YarnException(errorMsg); + } + } + @Override public String toString() { return GpuResourcePlugin.class.getName(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java new file mode 100644 index 0000000..888f899 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import static org.mockito.Mockito.mock; + +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.junit.Test; + +public class TestGpuResourcePlugin { + + @Test(expected = YarnException.class) + public void testResourceHandlerNotInitialized() throws YarnException { + GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class); + GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = + mock(GpuNodeResourceUpdateHandler.class); + + GpuResourcePlugin target = + new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer); + + target.getNMResourceInfo(); + } + + @Test + public void testResourceHandlerIsInitialized() throws YarnException { + GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class); + GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = + mock(GpuNodeResourceUpdateHandler.class); + + GpuResourcePlugin target = + new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer); + + target.createResourceHandler(null, null, null); + + //Not throwing any exception + target.getNMResourceInfo(); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org