Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 9271F17500 for ; Wed, 25 Feb 2015 17:24:22 +0000 (UTC) Received: (qmail 74377 invoked by uid 500); 25 Feb 2015 17:23:47 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 74220 invoked by uid 500); 25 Feb 2015 17:23:47 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 70298 invoked by uid 99); 25 Feb 2015 17:23:45 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 25 Feb 2015 17:23:45 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0B8BEE0D39; Wed, 25 Feb 2015 17:23:45 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zjshen@apache.org To: common-commits@hadoop.apache.org Date: Wed, 25 Feb 2015 17:24:24 -0000 Message-Id: <55bf3976958e427480b86be59843769b@git.apache.org> In-Reply-To: <388f87de9df8467ab380d8ba4e915671@git.apache.org> References: <388f87de9df8467ab380d8ba4e915671@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [41/50] [abbrv] hadoop git commit: YARN-2980. Move health check script related functionality to hadoop-common (Varun Saxena via aw) YARN-2980. Move health check script related functionality to hadoop-common (Varun Saxena via aw) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d4ac6822 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d4ac6822 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d4ac6822 Branch: refs/heads/YARN-2928 Commit: d4ac6822e1c5dfac504ced48f10ab57a55b49e93 Parents: 73bcfa9 Author: Allen Wittenauer Authored: Tue Feb 24 11:25:26 2015 -0800 Committer: Allen Wittenauer Committed: Tue Feb 24 11:25:26 2015 -0800 ---------------------------------------------------------------------- .../hadoop/util/NodeHealthScriptRunner.java | 345 ++++++++++++++++++ .../hadoop/util/TestNodeHealthScriptRunner.java | 136 +++++++ hadoop-yarn-project/CHANGES.txt | 3 + .../nodemanager/NodeHealthCheckerService.java | 12 +- .../nodemanager/NodeHealthScriptRunner.java | 356 ------------------- .../yarn/server/nodemanager/NodeManager.java | 26 +- .../yarn/server/nodemanager/TestEventFlow.java | 7 +- .../nodemanager/TestNodeHealthService.java | 86 ++--- .../BaseContainerManagerTest.java | 7 +- .../webapp/TestContainerLogsPage.java | 13 +- .../nodemanager/webapp/TestNMWebServer.java | 13 +- .../nodemanager/webapp/TestNMWebServices.java | 7 +- .../webapp/TestNMWebServicesApps.java | 6 +- .../webapp/TestNMWebServicesContainers.java | 5 +- 14 files changed, 587 insertions(+), 435 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java new file mode 100644 index 0000000..568ad5b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java @@ -0,0 +1,345 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Timer; +import java.util.TimerTask; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.util.Shell.ExitCodeException; +import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.StringUtils; + +/** + * + * The class which provides functionality of checking the health of the node + * using the configured node health script and reporting back to the service + * for which the health checker has been asked to report. + */ +public class NodeHealthScriptRunner extends AbstractService { + + private static Log LOG = LogFactory.getLog(NodeHealthScriptRunner.class); + + /** Absolute path to the health script. */ + private String nodeHealthScript; + /** Delay after which node health script to be executed */ + private long intervalTime; + /** Time after which the script should be timedout */ + private long scriptTimeout; + /** Timer used to schedule node health monitoring script execution */ + private Timer nodeHealthScriptScheduler; + + /** ShellCommandExecutor used to execute monitoring script */ + ShellCommandExecutor shexec = null; + + /** Pattern used for searching in the output of the node health script */ + static private final String ERROR_PATTERN = "ERROR"; + + /** Time out error message */ + public static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out"; + + private boolean isHealthy; + + private String healthReport; + + private long lastReportedTime; + + private TimerTask timer; + + private enum HealthCheckerExitStatus { + SUCCESS, + TIMED_OUT, + FAILED_WITH_EXIT_CODE, + FAILED_WITH_EXCEPTION, + FAILED + } + + + /** + * Class which is used by the {@link Timer} class to periodically execute the + * node health script. + * + */ + private class NodeHealthMonitorExecutor extends TimerTask { + + String exceptionStackTrace = ""; + + public NodeHealthMonitorExecutor(String[] args) { + ArrayList execScript = new ArrayList(); + execScript.add(nodeHealthScript); + if (args != null) { + execScript.addAll(Arrays.asList(args)); + } + shexec = new ShellCommandExecutor(execScript + .toArray(new String[execScript.size()]), null, null, scriptTimeout); + } + + @Override + public void run() { + HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS; + try { + shexec.execute(); + } catch (ExitCodeException e) { + // ignore the exit code of the script + status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; + // On Windows, we will not hit the Stream closed IOException + // thrown by stdout buffered reader for timeout event. + if (Shell.WINDOWS && shexec.isTimedOut()) { + status = HealthCheckerExitStatus.TIMED_OUT; + } + } catch (Exception e) { + LOG.warn("Caught exception : " + e.getMessage()); + if (!shexec.isTimedOut()) { + status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION; + } else { + status = HealthCheckerExitStatus.TIMED_OUT; + } + exceptionStackTrace = StringUtils.stringifyException(e); + } finally { + if (status == HealthCheckerExitStatus.SUCCESS) { + if (hasErrors(shexec.getOutput())) { + status = HealthCheckerExitStatus.FAILED; + } + } + reportHealthStatus(status); + } + } + + /** + * Method which is used to parse output from the node health monitor and + * send to the report address. + * + * The timed out script or script which causes IOException output is + * ignored. + * + * The node is marked unhealthy if + *
    + *
  1. The node health script times out
  2. + *
  3. The node health scripts output has a line which begins with ERROR
  4. + *
  5. An exception is thrown while executing the script
  6. + *
+ * If the script throws {@link IOException} or {@link ExitCodeException} the + * output is ignored and node is left remaining healthy, as script might + * have syntax error. + * + * @param status + */ + void reportHealthStatus(HealthCheckerExitStatus status) { + long now = System.currentTimeMillis(); + switch (status) { + case SUCCESS: + setHealthStatus(true, "", now); + break; + case TIMED_OUT: + setHealthStatus(false, NODE_HEALTH_SCRIPT_TIMED_OUT_MSG); + break; + case FAILED_WITH_EXCEPTION: + setHealthStatus(false, exceptionStackTrace); + break; + case FAILED_WITH_EXIT_CODE: + setHealthStatus(true, "", now); + break; + case FAILED: + setHealthStatus(false, shexec.getOutput()); + break; + } + } + + /** + * Method to check if the output string has line which begins with ERROR. + * + * @param output + * string + * @return true if output string has error pattern in it. + */ + private boolean hasErrors(String output) { + String[] splits = output.split("\n"); + for (String split : splits) { + if (split.startsWith(ERROR_PATTERN)) { + return true; + } + } + return false; + } + } + + public NodeHealthScriptRunner(String scriptName, long chkInterval, long timeout, + String[] scriptArgs) { + super(NodeHealthScriptRunner.class.getName()); + this.lastReportedTime = System.currentTimeMillis(); + this.isHealthy = true; + this.healthReport = ""; + this.nodeHealthScript = scriptName; + this.intervalTime = chkInterval; + this.scriptTimeout = timeout; + this.timer = new NodeHealthMonitorExecutor(scriptArgs); + } + + /* + * Method which initializes the values for the script path and interval time. + */ + @Override + protected void serviceInit(Configuration conf) throws Exception { + super.serviceInit(conf); + } + + /** + * Method used to start the Node health monitoring. + * + */ + @Override + protected void serviceStart() throws Exception { + // if health script path is not configured don't start the thread. + if (!shouldRun(nodeHealthScript)) { + LOG.info("Not starting node health monitor"); + return; + } + nodeHealthScriptScheduler = new Timer("NodeHealthMonitor-Timer", true); + // Start the timer task immediately and + // then periodically at interval time. + nodeHealthScriptScheduler.scheduleAtFixedRate(timer, 0, intervalTime); + super.serviceStart(); + } + + /** + * Method used to terminate the node health monitoring service. + * + */ + @Override + protected void serviceStop() { + if (!shouldRun(nodeHealthScript)) { + return; + } + if (nodeHealthScriptScheduler != null) { + nodeHealthScriptScheduler.cancel(); + } + if (shexec != null) { + Process p = shexec.getProcess(); + if (p != null) { + p.destroy(); + } + } + } + + /** + * Gets the if the node is healthy or not + * + * @return true if node is healthy + */ + public boolean isHealthy() { + return isHealthy; + } + + /** + * Sets if the node is healhty or not considering disks' health also. + * + * @param isHealthy + * if or not node is healthy + */ + private synchronized void setHealthy(boolean isHealthy) { + this.isHealthy = isHealthy; + } + + /** + * Returns output from health script. if node is healthy then an empty string + * is returned. + * + * @return output from health script + */ + public String getHealthReport() { + return healthReport; + } + + /** + * Sets the health report from the node health script. Also set the disks' + * health info obtained from DiskHealthCheckerService. + * + * @param healthReport + */ + private synchronized void setHealthReport(String healthReport) { + this.healthReport = healthReport; + } + + /** + * Returns time stamp when node health script was last run. + * + * @return timestamp when node health script was last run + */ + public long getLastReportedTime() { + return lastReportedTime; + } + + /** + * Sets the last run time of the node health script. + * + * @param lastReportedTime + */ + private synchronized void setLastReportedTime(long lastReportedTime) { + this.lastReportedTime = lastReportedTime; + } + + /** + * Method used to determine if or not node health monitoring service should be + * started or not. Returns true if following conditions are met: + * + *
    + *
  1. Path to Node health check script is not empty
  2. + *
  3. Node health check script file exists
  4. + *
+ * + * @return true if node health monitoring service can be started. + */ + public static boolean shouldRun(String healthScript) { + if (healthScript == null || healthScript.trim().isEmpty()) { + return false; + } + File f = new File(healthScript); + return f.exists() && FileUtil.canExecute(f); + } + + private synchronized void setHealthStatus(boolean isHealthy, String output) { + LOG.info("health status being set as " + output); + this.setHealthy(isHealthy); + this.setHealthReport(output); + } + + private synchronized void setHealthStatus(boolean isHealthy, String output, + long time) { + LOG.info("health status being set as " + output); + this.setHealthStatus(isHealthy, output); + this.setLastReportedTime(time); + } + + /** + * Used only by tests to access the timer task directly + * @return the timer task + */ + public TimerTask getTimerTask() { + return timer; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java new file mode 100644 index 0000000..8fc64d1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.TimerTask; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestNodeHealthScriptRunner { + + protected static File testRootDir = new File("target", + TestNodeHealthScriptRunner.class.getName() + + "-localDir").getAbsoluteFile(); + + private File nodeHealthscriptFile = new File(testRootDir, + Shell.appendScriptExtension("failingscript")); + + @Before + public void setup() { + testRootDir.mkdirs(); + } + + @After + public void tearDown() throws Exception { + if (testRootDir.exists()) { + FileContext.getLocalFSFileContext().delete( + new Path(testRootDir.getAbsolutePath()), true); + } + } + + private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable) + throws IOException { + PrintWriter pw = null; + try { + FileUtil.setWritable(nodeHealthscriptFile, true); + FileUtil.setReadable(nodeHealthscriptFile, true); + pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile)); + pw.println(scriptStr); + pw.flush(); + } finally { + pw.close(); + } + FileUtil.setExecutable(nodeHealthscriptFile, setExecutable); + } + + @Test + public void testNodeHealthScriptShouldRun() throws IOException { + Assert.assertFalse("Node health script should start", + NodeHealthScriptRunner.shouldRun( + nodeHealthscriptFile.getAbsolutePath())); + writeNodeHealthScriptFile("", false); + // Node health script should not start if the node health script is not + // executable. + Assert.assertFalse("Node health script should start", + NodeHealthScriptRunner.shouldRun( + nodeHealthscriptFile.getAbsolutePath())); + writeNodeHealthScriptFile("", true); + Assert.assertTrue("Node health script should start", + NodeHealthScriptRunner.shouldRun( + nodeHealthscriptFile.getAbsolutePath())); + } + + @Test + public void testNodeHealthScript() throws Exception { + String errorScript = "echo ERROR\n echo \"Tracker not healthy\""; + String normalScript = "echo \"I am all fine\""; + String timeOutScript = + Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\"" + : "sleep 4\necho \"I am fine\""; + Configuration conf = new Configuration(); + writeNodeHealthScriptFile(normalScript, true); + NodeHealthScriptRunner nodeHealthScriptRunner = new NodeHealthScriptRunner( + nodeHealthscriptFile.getAbsolutePath(), + 500, 1000, new String[] {}); + nodeHealthScriptRunner.init(conf); + TimerTask timerTask = nodeHealthScriptRunner.getTimerTask(); + + timerTask.run(); + // Normal Script runs successfully + Assert.assertTrue("Node health status reported unhealthy", + nodeHealthScriptRunner.isHealthy()); + Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport()); + + // Error script. + writeNodeHealthScriptFile(errorScript, true); + // Run timer + timerTask.run(); + Assert.assertFalse("Node health status reported healthy", + nodeHealthScriptRunner.isHealthy()); + Assert.assertTrue( + nodeHealthScriptRunner.getHealthReport().contains("ERROR")); + + // Healthy script. + writeNodeHealthScriptFile(normalScript, true); + timerTask.run(); + Assert.assertTrue("Node health status reported unhealthy", + nodeHealthScriptRunner.isHealthy()); + Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport()); + + // Timeout script. + writeNodeHealthScriptFile(timeOutScript, true); + timerTask.run(); + Assert.assertFalse("Node health status reported healthy even after timeout", + nodeHealthScriptRunner.isHealthy()); + Assert.assertEquals( + NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG, + nodeHealthScriptRunner.getHealthReport()); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 36bc707..39e2dc0 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -17,6 +17,9 @@ Trunk - Unreleased YARN-2796. deprecate sbin/yarn-daemon.sh (aw) + YARN-2980. Move health check script related functionality to hadoop-common + (Varun Saxena via aw) + OPTIMIZATIONS BUG FIXES http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java index 6d6001a..02b318a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java @@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.server.nodemanager; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.util.NodeHealthScriptRunner; +import org.apache.hadoop.yarn.conf.YarnConfiguration; /** * The class which provides functionality of checking the health of the node and @@ -33,15 +35,17 @@ public class NodeHealthCheckerService extends CompositeService { static final String SEPARATOR = ";"; - public NodeHealthCheckerService() { + public NodeHealthCheckerService(NodeHealthScriptRunner scriptRunner, + LocalDirsHandlerService dirHandlerService) { super(NodeHealthCheckerService.class.getName()); - dirsHandler = new LocalDirsHandlerService(); + nodeHealthScriptRunner = scriptRunner; + dirsHandler = dirHandlerService; } @Override protected void serviceInit(Configuration conf) throws Exception { - if (NodeHealthScriptRunner.shouldRun(conf)) { - nodeHealthScriptRunner = new NodeHealthScriptRunner(); + if (NodeHealthScriptRunner.shouldRun( + conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH))) { addService(nodeHealthScriptRunner); } addService(dirsHandler); http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java deleted file mode 100644 index e3c9862..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java +++ /dev/null @@ -1,356 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.server.nodemanager; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Timer; -import java.util.TimerTask; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.service.AbstractService; -import org.apache.hadoop.util.Shell.ExitCodeException; -import org.apache.hadoop.util.Shell.ShellCommandExecutor; -import org.apache.hadoop.util.Shell; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.yarn.conf.YarnConfiguration; - -/** - * - * The class which provides functionality of checking the health of the node - * using the configured node health script and reporting back to the service - * for which the health checker has been asked to report. - */ -public class NodeHealthScriptRunner extends AbstractService { - - private static Log LOG = LogFactory.getLog(NodeHealthScriptRunner.class); - - /** Absolute path to the health script. */ - private String nodeHealthScript; - /** Delay after which node health script to be executed */ - private long intervalTime; - /** Time after which the script should be timedout */ - private long scriptTimeout; - /** Timer used to schedule node health monitoring script execution */ - private Timer nodeHealthScriptScheduler; - - /** ShellCommandExecutor used to execute monitoring script */ - ShellCommandExecutor shexec = null; - - /** Configuration used by the checker */ - private Configuration conf; - - /** Pattern used for searching in the output of the node health script */ - static private final String ERROR_PATTERN = "ERROR"; - - /** Time out error message */ - static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out"; - - private boolean isHealthy; - - private String healthReport; - - private long lastReportedTime; - - private TimerTask timer; - - private enum HealthCheckerExitStatus { - SUCCESS, - TIMED_OUT, - FAILED_WITH_EXIT_CODE, - FAILED_WITH_EXCEPTION, - FAILED - } - - - /** - * Class which is used by the {@link Timer} class to periodically execute the - * node health script. - * - */ - private class NodeHealthMonitorExecutor extends TimerTask { - - String exceptionStackTrace = ""; - - public NodeHealthMonitorExecutor(String[] args) { - ArrayList execScript = new ArrayList(); - execScript.add(nodeHealthScript); - if (args != null) { - execScript.addAll(Arrays.asList(args)); - } - shexec = new ShellCommandExecutor(execScript - .toArray(new String[execScript.size()]), null, null, scriptTimeout); - } - - @Override - public void run() { - HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS; - try { - shexec.execute(); - } catch (ExitCodeException e) { - // ignore the exit code of the script - status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; - // On Windows, we will not hit the Stream closed IOException - // thrown by stdout buffered reader for timeout event. - if (Shell.WINDOWS && shexec.isTimedOut()) { - status = HealthCheckerExitStatus.TIMED_OUT; - } - } catch (Exception e) { - LOG.warn("Caught exception : " + e.getMessage()); - if (!shexec.isTimedOut()) { - status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION; - } else { - status = HealthCheckerExitStatus.TIMED_OUT; - } - exceptionStackTrace = StringUtils.stringifyException(e); - } finally { - if (status == HealthCheckerExitStatus.SUCCESS) { - if (hasErrors(shexec.getOutput())) { - status = HealthCheckerExitStatus.FAILED; - } - } - reportHealthStatus(status); - } - } - - /** - * Method which is used to parse output from the node health monitor and - * send to the report address. - * - * The timed out script or script which causes IOException output is - * ignored. - * - * The node is marked unhealthy if - *
    - *
  1. The node health script times out
  2. - *
  3. The node health scripts output has a line which begins with ERROR
  4. - *
  5. An exception is thrown while executing the script
  6. - *
- * If the script throws {@link IOException} or {@link ExitCodeException} the - * output is ignored and node is left remaining healthy, as script might - * have syntax error. - * - * @param status - */ - void reportHealthStatus(HealthCheckerExitStatus status) { - long now = System.currentTimeMillis(); - switch (status) { - case SUCCESS: - setHealthStatus(true, "", now); - break; - case TIMED_OUT: - setHealthStatus(false, NODE_HEALTH_SCRIPT_TIMED_OUT_MSG); - break; - case FAILED_WITH_EXCEPTION: - setHealthStatus(false, exceptionStackTrace); - break; - case FAILED_WITH_EXIT_CODE: - setHealthStatus(true, "", now); - break; - case FAILED: - setHealthStatus(false, shexec.getOutput()); - break; - } - } - - /** - * Method to check if the output string has line which begins with ERROR. - * - * @param output - * string - * @return true if output string has error pattern in it. - */ - private boolean hasErrors(String output) { - String[] splits = output.split("\n"); - for (String split : splits) { - if (split.startsWith(ERROR_PATTERN)) { - return true; - } - } - return false; - } - } - - public NodeHealthScriptRunner() { - super(NodeHealthScriptRunner.class.getName()); - this.lastReportedTime = System.currentTimeMillis(); - this.isHealthy = true; - this.healthReport = ""; - } - - /* - * Method which initializes the values for the script path and interval time. - */ - @Override - protected void serviceInit(Configuration conf) throws Exception { - this.conf = conf; - this.nodeHealthScript = - conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH); - this.intervalTime = conf.getLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, - YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS); - this.scriptTimeout = conf.getLong( - YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, - YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS); - String[] args = conf.getStrings(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, - new String[] {}); - timer = new NodeHealthMonitorExecutor(args); - super.serviceInit(conf); - } - - /** - * Method used to start the Node health monitoring. - * - */ - @Override - protected void serviceStart() throws Exception { - // if health script path is not configured don't start the thread. - if (!shouldRun(conf)) { - LOG.info("Not starting node health monitor"); - return; - } - nodeHealthScriptScheduler = new Timer("NodeHealthMonitor-Timer", true); - // Start the timer task immediately and - // then periodically at interval time. - nodeHealthScriptScheduler.scheduleAtFixedRate(timer, 0, intervalTime); - super.serviceStart(); - } - - /** - * Method used to terminate the node health monitoring service. - * - */ - @Override - protected void serviceStop() { - if (!shouldRun(conf)) { - return; - } - if (nodeHealthScriptScheduler != null) { - nodeHealthScriptScheduler.cancel(); - } - if (shexec != null) { - Process p = shexec.getProcess(); - if (p != null) { - p.destroy(); - } - } - } - - /** - * Gets the if the node is healthy or not - * - * @return true if node is healthy - */ - public boolean isHealthy() { - return isHealthy; - } - - /** - * Sets if the node is healhty or not considering disks' health also. - * - * @param isHealthy - * if or not node is healthy - */ - private synchronized void setHealthy(boolean isHealthy) { - this.isHealthy = isHealthy; - } - - /** - * Returns output from health script. if node is healthy then an empty string - * is returned. - * - * @return output from health script - */ - public String getHealthReport() { - return healthReport; - } - - /** - * Sets the health report from the node health script. Also set the disks' - * health info obtained from DiskHealthCheckerService. - * - * @param healthReport - */ - private synchronized void setHealthReport(String healthReport) { - this.healthReport = healthReport; - } - - /** - * Returns time stamp when node health script was last run. - * - * @return timestamp when node health script was last run - */ - public long getLastReportedTime() { - return lastReportedTime; - } - - /** - * Sets the last run time of the node health script. - * - * @param lastReportedTime - */ - private synchronized void setLastReportedTime(long lastReportedTime) { - this.lastReportedTime = lastReportedTime; - } - - /** - * Method used to determine if or not node health monitoring service should be - * started or not. Returns true if following conditions are met: - * - *
    - *
  1. Path to Node health check script is not empty
  2. - *
  3. Node health check script file exists
  4. - *
- * - * @param conf - * @return true if node health monitoring service can be started. - */ - public static boolean shouldRun(Configuration conf) { - String nodeHealthScript = - conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH); - if (nodeHealthScript == null || nodeHealthScript.trim().isEmpty()) { - return false; - } - File f = new File(nodeHealthScript); - return f.exists() && FileUtil.canExecute(f); - } - - private synchronized void setHealthStatus(boolean isHealthy, String output) { - this.setHealthy(isHealthy); - this.setHealthReport(output); - } - - private synchronized void setHealthStatus(boolean isHealthy, String output, - long time) { - this.setHealthStatus(isHealthy, output); - this.setLastReportedTime(time); - } - - /** - * Used only by tests to access the timer task directly - * @return the timer task - */ - TimerTask getTimerTask() { - return timer; - } -} http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 53cbb11..7584138 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -37,6 +37,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; @@ -181,6 +182,25 @@ public class NodeManager extends CompositeService } } + public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration conf) { + String nodeHealthScript = + conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH); + if(!NodeHealthScriptRunner.shouldRun(nodeHealthScript)) { + LOG.info("Abey khali"); + return null; + } + long nmCheckintervalTime = conf.getLong( + YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, + YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS); + long scriptTimeout = conf.getLong( + YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, + YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS); + String[] scriptArgs = conf.getStrings( + YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, new String[] {}); + return new NodeHealthScriptRunner(nodeHealthScript, + nmCheckintervalTime, scriptTimeout, scriptArgs); + } + @Override protected void serviceInit(Configuration conf) throws Exception { @@ -216,9 +236,11 @@ public class NodeManager extends CompositeService // NodeManager level dispatcher this.dispatcher = new AsyncDispatcher(); - nodeHealthChecker = new NodeHealthCheckerService(); + dirsHandler = new LocalDirsHandlerService(); + nodeHealthChecker = + new NodeHealthCheckerService( + getNodeHealthScriptRunner(conf), dirsHandler); addService(nodeHealthChecker); - dirsHandler = nodeHealthChecker.getDiskHandler(); this.context = createNMContext(containerTokenSecretManager, nmTokenSecretManager, nmStore); http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index d2caefe..c878e5c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -63,7 +63,7 @@ public class TestEventFlow { private static File remoteLogDir = new File("target", TestEventFlow.class.getName() + "-remoteLogDir").getAbsoluteFile(); private static final long SIMULATED_RM_IDENTIFIER = 1234; - + @Test public void testSuccessfulContainerLaunch() throws InterruptedException, IOException, YarnException { @@ -98,9 +98,10 @@ public class TestEventFlow { DeletionService del = new DeletionService(exec); Dispatcher dispatcher = new AsyncDispatcher(); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); + NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( + NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); healthChecker.init(conf); - LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); NodeManagerMetrics metrics = NodeManagerMetrics.create(); NodeStatusUpdater nodeStatusUpdater = new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java index 3542196..2e41dea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; -import java.util.TimerTask; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; @@ -40,6 +40,9 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.spy; + public class TestNodeHealthService { private static volatile Log LOG = LogFactory @@ -66,17 +69,7 @@ public class TestNodeHealthService { new Path(testRootDir.getAbsolutePath()), true); } } - - private Configuration getConfForNodeHealthScript() { - Configuration conf = new Configuration(); - conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH, - nodeHealthscriptFile.getAbsolutePath()); - conf.setLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, 500); - conf.setLong( - YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, 1000); - return conf; - } - + private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable) throws IOException { PrintWriter pw = null; @@ -92,28 +85,14 @@ public class TestNodeHealthService { FileUtil.setExecutable(nodeHealthscriptFile, setExecutable); } - @Test - public void testNodeHealthScriptShouldRun() throws IOException { - // Node health script should not start if there is no property called - // node health script path. - Assert.assertFalse("By default Health script should not have started", - NodeHealthScriptRunner.shouldRun(new Configuration())); - Configuration conf = getConfForNodeHealthScript(); - // Node health script should not start if the node health script does not - // exists - Assert.assertFalse("Node health script should start", - NodeHealthScriptRunner.shouldRun(conf)); - // Create script path. - conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); - conf.addResource(nodeHealthConfigFile.getName()); - writeNodeHealthScriptFile("", false); - // Node health script should not start if the node health script is not - // executable. - Assert.assertFalse("Node health script should start", - NodeHealthScriptRunner.shouldRun(conf)); - writeNodeHealthScriptFile("", true); - Assert.assertTrue("Node health script should start", - NodeHealthScriptRunner.shouldRun(conf)); + private Configuration getConfForNodeHealthScript() { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH, + nodeHealthscriptFile.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, 500); + conf.setLong( + YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, 1000); + return conf; } private void setHealthStatus(NodeHealthStatus healthStatus, boolean isHealthy, @@ -124,27 +103,24 @@ public class TestNodeHealthService { } @Test - public void testNodeHealthScript() throws Exception { + public void testNodeHealthService() throws Exception { RecordFactory factory = RecordFactoryProvider.getRecordFactory(null); NodeHealthStatus healthStatus = factory.newRecordInstance(NodeHealthStatus.class); - String errorScript = "echo ERROR\n echo \"Tracker not healthy\""; - String normalScript = "echo \"I am all fine\""; - String timeOutScript = Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\"" - : "sleep 4\necho \"I am fine\""; Configuration conf = getConfForNodeHealthScript(); conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.addResource(nodeHealthConfigFile.getName()); + writeNodeHealthScriptFile("", true); - writeNodeHealthScriptFile(normalScript, true); - NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(); - nodeHealthChecker.init(conf); + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthScriptRunner nodeHealthScriptRunner = - nodeHealthChecker.getNodeHealthScriptRunner(); - TimerTask timerTask = nodeHealthScriptRunner.getTimerTask(); - - timerTask.run(); + spy(NodeManager.getNodeHealthScriptRunner(conf)); + NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService( + nodeHealthScriptRunner, dirsHandler); + nodeHealthChecker.init(conf); + doReturn(true).when(nodeHealthScriptRunner).isHealthy(); + doReturn("").when(nodeHealthScriptRunner).getHealthReport(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); @@ -155,11 +131,7 @@ public class TestNodeHealthService { Assert.assertTrue("Node health status reported unhealthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); - // write out error file. - // Healthy to unhealthy transition - writeNodeHealthScriptFile(errorScript, true); - // Run timer - timerTask.run(); + doReturn(false).when(nodeHealthScriptRunner).isHealthy(); // update health status setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), @@ -169,10 +141,8 @@ public class TestNodeHealthService { .getIsNodeHealthy()); Assert.assertTrue("Node health status reported healthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); - - // Check unhealthy to healthy transitions. - writeNodeHealthScriptFile(normalScript, true); - timerTask.run(); + + doReturn(true).when(nodeHealthScriptRunner).isHealthy(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); @@ -184,8 +154,9 @@ public class TestNodeHealthService { .getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Healthy to timeout transition. - writeNodeHealthScriptFile(timeOutScript, true); - timerTask.run(); + doReturn(false).when(nodeHealthScriptRunner).isHealthy(); + doReturn(NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG) + .when(nodeHealthScriptRunner).getHealthReport(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); @@ -198,5 +169,4 @@ public class TestNodeHealthService { + NodeHealthCheckerService.SEPARATOR + nodeHealthChecker.getDiskHandler().getDisksHealthReport(false))); } - } http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 1907e1a..8c0ceeb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -57,6 +58,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.LocalRMInterface; import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl; @@ -174,9 +176,10 @@ public abstract class BaseContainerManagerTest { delSrvc.init(conf); exec = createContainerExecutor(); - nodeHealthChecker = new NodeHealthCheckerService(); + dirsHandler = new LocalDirsHandlerService(); + nodeHealthChecker = new NodeHealthCheckerService( + NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); nodeHealthChecker.init(conf); - dirsHandler = nodeHealthChecker.getDiskHandler(); containerManager = createContainerManager(delSrvc); ((NMContext)context).setContainerManager(containerManager); nodeStatusUpdater.init(conf); http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java index b1d4397..43100b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -74,6 +75,12 @@ import com.google.inject.Module; public class TestContainerLogsPage { + private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) { + NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf); + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); + return new NodeHealthCheckerService(scriptRunner, dirsHandler); + } + @Test(timeout=30000) public void testContainerLogDirs() throws IOException, YarnException { File absLogDir = new File("target", @@ -81,7 +88,7 @@ public class TestContainerLogsPage { String logdirwithFile = absLogDir.toURI().toString(); Configuration conf = new Configuration(); conf.set(YarnConfiguration.NM_LOG_DIRS, logdirwithFile); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, @@ -116,7 +123,7 @@ public class TestContainerLogsPage { files = ContainerLogsUtils.getContainerLogDirs(container1, user, nmContext); Assert.assertTrue(!(files.get(0).toString().contains("file:"))); } - + @Test(timeout = 10000) public void testContainerLogPageAccess() throws IOException { // SecureIOUtils require Native IO to be enabled. This test will run @@ -137,7 +144,7 @@ public class TestContainerLogsPage { "kerberos"); UserGroupInformation.setConfiguration(conf); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); // Add an application and the corresponding containers http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java index a7006e0..e1845c7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java @@ -28,6 +28,7 @@ import java.io.Writer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -77,7 +78,13 @@ public class TestNMWebServer { FileUtil.fullyDelete(testRootDir); FileUtil.fullyDelete(testLogDir); } - + + private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) { + NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf); + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); + return new NodeHealthCheckerService(scriptRunner, dirsHandler); + } + private int startNMWebAppServer(String webAddr) { Context nmContext = new NodeManager.NMContext(null, null, null, null, null); @@ -106,7 +113,7 @@ public class TestNMWebServer { Configuration conf = new Configuration(); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); @@ -169,7 +176,7 @@ public class TestNMWebServer { Configuration conf = new Configuration(); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index 7caad4a..5a89e74 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -36,6 +36,7 @@ import org.junit.Assert; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.NodeId; @@ -98,14 +99,16 @@ public class TestNMWebServices extends JerseyTestBase { TestNMWebServices.class.getSimpleName() + "LogDir"); private Injector injector = Guice.createInjector(new ServletModule() { + @Override protected void configureServlets() { Configuration conf = new Configuration(); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + dirsHandler = new LocalDirsHandlerService(); + NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( + NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); healthChecker.init(conf); - dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager, null); http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java index 3e7aac8..e274abb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java @@ -33,6 +33,7 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -91,11 +92,14 @@ public class TestNMWebServicesApps extends JerseyTestBase { TestNMWebServicesApps.class.getSimpleName() + "LogDir"); private Injector injector = Guice.createInjector(new ServletModule() { + @Override protected void configureServlets() { conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); + NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( + NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java index ceb1d57..3bfd440 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java @@ -34,6 +34,7 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -123,7 +124,9 @@ public class TestNMWebServicesContainers extends JerseyTestBase { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); + NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( + NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf);