spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From van...@apache.org
Subject spark git commit: [SPARK-17742][CORE] Fail launcher app handle if child process exits with error.
Date Fri, 25 Aug 2017 17:04:26 GMT
Repository: spark
Updated Branches:
  refs/heads/master 1813c4a8d -> 628bdeabd


[SPARK-17742][CORE] Fail launcher app handle if child process exits with error.

This is a follow up to cba826d0; that commit set the app handle state
to "LOST" when the child process exited, but that can be ambiguous. This
change sets the state to "FAILED" if the exit code was non-zero and
the handle state wasn't a failure state, or "LOST" if the exit status
was zero.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #19012 from vanzin/SPARK-17742.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/628bdeab
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/628bdeab
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/628bdeab

Branch: refs/heads/master
Commit: 628bdeabda3347d0903c9ac8748d37d7b379d1e6
Parents: 1813c4a
Author: Marcelo Vanzin <vanzin@cloudera.com>
Authored: Fri Aug 25 10:04:21 2017 -0700
Committer: Marcelo Vanzin <vanzin@cloudera.com>
Committed: Fri Aug 25 10:04:21 2017 -0700

----------------------------------------------------------------------
 .../spark/launcher/ChildProcAppHandle.java      | 27 +++++++++++++++-----
 .../spark/launcher/ChildProcAppHandleSuite.java | 21 ++++++++++++++-
 2 files changed, 41 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/628bdeab/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
index bf91640..5391d4a 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
@@ -156,9 +156,15 @@ class ChildProcAppHandle implements SparkAppHandle {
    * the exit code.
    */
   void monitorChild() {
-    while (childProc.isAlive()) {
+    Process proc = childProc;
+    if (proc == null) {
+      // Process may have already been disposed of, e.g. by calling kill().
+      return;
+    }
+
+    while (proc.isAlive()) {
       try {
-        childProc.waitFor();
+        proc.waitFor();
       } catch (Exception e) {
         LOG.log(Level.WARNING, "Exception waiting for child process to exit.", e);
       }
@@ -173,15 +179,24 @@ class ChildProcAppHandle implements SparkAppHandle {
 
       int ec;
       try {
-        ec = childProc.exitValue();
+        ec = proc.exitValue();
       } catch (Exception e) {
         LOG.log(Level.WARNING, "Exception getting child process exit code, assuming failure.",
e);
         ec = 1;
       }
 
-      // Only override the success state; leave other fail states alone.
-      if (!state.isFinal() || (ec != 0 && state == State.FINISHED)) {
-        state = State.LOST;
+      State newState = null;
+      if (ec != 0) {
+        // Override state with failure if the current state is not final, or is success.
+        if (!state.isFinal() || state == State.FINISHED) {
+          newState = State.FAILED;
+        }
+      } else if (!state.isFinal()) {
+        newState = State.LOST;
+      }
+
+      if (newState != null) {
+        state = newState;
         fireEvent(false);
       }
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/628bdeab/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
----------------------------------------------------------------------
diff --git a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
index 602f55a..3b4d1b0 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
@@ -46,7 +46,9 @@ public class ChildProcAppHandleSuite extends BaseSuite {
   private static final List<String> TEST_SCRIPT = Arrays.asList(
     "#!/bin/sh",
     "echo \"output\"",
-    "echo \"error\" 1>&2");
+    "echo \"error\" 1>&2",
+    "while [ -n \"$1\" ]; do EC=$1; shift; done",
+    "exit $EC");
 
   private static File TEST_SCRIPT_PATH;
 
@@ -176,6 +178,7 @@ public class ChildProcAppHandleSuite extends BaseSuite {
 
   @Test
   public void testProcMonitorWithOutputRedirection() throws Exception {
+    assumeFalse(isWindows());
     File err = Files.createTempFile("out", "txt").toFile();
     SparkAppHandle handle = new TestSparkLauncher()
       .redirectError()
@@ -187,6 +190,7 @@ public class ChildProcAppHandleSuite extends BaseSuite {
 
   @Test
   public void testProcMonitorWithLogRedirection() throws Exception {
+    assumeFalse(isWindows());
     SparkAppHandle handle = new TestSparkLauncher()
       .redirectToLog(getClass().getName())
       .startApplication();
@@ -194,6 +198,16 @@ public class ChildProcAppHandleSuite extends BaseSuite {
     assertEquals(SparkAppHandle.State.LOST, handle.getState());
   }
 
+  @Test
+  public void testFailedChildProc() throws Exception {
+    assumeFalse(isWindows());
+    SparkAppHandle handle = new TestSparkLauncher(1)
+      .redirectToLog(getClass().getName())
+      .startApplication();
+    waitFor(handle);
+    assertEquals(SparkAppHandle.State.FAILED, handle.getState());
+  }
+
   private void waitFor(SparkAppHandle handle) throws Exception {
     long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10);
     try {
@@ -212,7 +226,12 @@ public class ChildProcAppHandleSuite extends BaseSuite {
   private static class TestSparkLauncher extends SparkLauncher {
 
     TestSparkLauncher() {
+      this(0);
+    }
+
+    TestSparkLauncher(int ec) {
       setAppResource("outputredirtest");
+      addAppArgs(String.valueOf(ec));
     }
 
     @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message