spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject spark git commit: [SPARK-15263][CORE] Make shuffle service dir cleanup faster by using `rm -rf`
Date Wed, 18 May 2016 11:10:11 GMT
Repository: spark
Updated Branches:
  refs/heads/master 420b70069 -> c1fd9cacb


[SPARK-15263][CORE] Make shuffle service dir cleanup faster by using `rm -rf`

## What changes were proposed in this pull request?

Jira: https://issues.apache.org/jira/browse/SPARK-15263

The current logic for directory cleanup is slow because it does directory listing, recurses
over child directories, checks for symbolic links, deletes leaf files and finally deletes
the dirs when they are empty. There is back-and-forth switching from kernel space to user
space while doing this. Since most of the deployment backends would be Unix systems, we could
essentially just do `rm -rf` so that entire deletion logic runs in kernel space.

The current Java based impl in Spark seems to be similar to what standard libraries like guava
and commons IO do (eg. http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/FileUtils.java?view=markup#l1540).
However, guava removed this method in favour of shelling out to an operating system command
(like in this PR). See the `Deprecated` note in older javadocs for guava for details : http://google.github.io/guava/releases/10.0.1/api/docs/com/google/common/io/Files.html#deleteRecursively(java.io.File)

Ideally, Java should be providing such APIs so that users won't have to do such things to
get platform specific code. Also, its not just about speed, but also handling race conditions
while doing at FS deletions is tricky. I could find this bug for Java in similar context :
http://bugs.java.com/bugdatabase/view_bug.do?bug_id=7148952

## How was this patch tested?

I am relying on existing test cases to test the method. If there are suggestions about testing
it, welcome to hear about it.

## Performance gains

*Input setup* : Created a nested directory structure of depth 3 and each entry having 50 sub-dirs.
The input being cleaned up had total ~125k dirs.

Ran both approaches (in isolation) for 6 times to get average numbers:

Native Java cleanup  | `rm -rf` as a separate process
------------ | -------------
10.04 sec | 4.11 sec

This change made deletion 2.4 times faster for the given test input.

Author: Tejas Patil <tejasp@fb.com>

Closes #13042 from tejasapatil/delete_recursive.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1fd9cac
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1fd9cac
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1fd9cac

Branch: refs/heads/master
Commit: c1fd9cacba8e602cc55a893f0beb05ea48c3a1f6
Parents: 420b700
Author: Tejas Patil <tejasp@fb.com>
Authored: Wed May 18 12:10:32 2016 +0100
Committer: Sean Owen <sowen@cloudera.com>
Committed: Wed May 18 12:10:32 2016 +0100

----------------------------------------------------------------------
 common/network-common/pom.xml                   |  4 ++
 .../apache/spark/network/util/JavaUtils.java    | 49 +++++++++++++++++++-
 .../network/shuffle/TestShuffleDataContext.java | 24 ++++------
 3 files changed, 61 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c1fd9cac/common/network-common/pom.xml
----------------------------------------------------------------------
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5444ae6..12d8927 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -41,6 +41,10 @@
       <groupId>io.netty</groupId>
       <artifactId>netty-all</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+    </dependency>
 
     <!-- Provided dependencies -->
     <dependency>

http://git-wip-us.apache.org/repos/asf/spark/blob/c1fd9cac/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
----------------------------------------------------------------------
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
index fbed2f0..eb534ee 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 import io.netty.buffer.Unpooled;
+import org.apache.commons.lang3.SystemUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -79,14 +80,32 @@ public class JavaUtils {
     return Unpooled.wrappedBuffer(b).toString(StandardCharsets.UTF_8);
   }
 
-  /*
+  /**
    * Delete a file or directory and its contents recursively.
    * Don't follow directories if they are symlinks.
-   * Throws an exception if deletion is unsuccessful.
+   *
+   * @param file Input file / dir to be deleted
+   * @throws IOException if deletion is unsuccessful
    */
   public static void deleteRecursively(File file) throws IOException {
     if (file == null) { return; }
 
+    // On Unix systems, use operating system command to run faster
+    // If that does not work out, fallback to the Java IO way
+    if (SystemUtils.IS_OS_UNIX) {
+      try {
+        deleteRecursivelyUsingUnixNative(file);
+        return;
+      } catch (IOException e) {
+        logger.warn("Attempt to delete using native Unix OS command failed for path = {}.
" +
+                        "Falling back to Java IO way", file.getAbsolutePath(), e);
+      }
+    }
+
+    deleteRecursivelyUsingJavaIO(file);
+  }
+
+  private static void deleteRecursivelyUsingJavaIO(File file) throws IOException {
     if (file.isDirectory() && !isSymlink(file)) {
       IOException savedIOException = null;
       for (File child : listFilesSafely(file)) {
@@ -109,6 +128,32 @@ public class JavaUtils {
     }
   }
 
+  private static void deleteRecursivelyUsingUnixNative(File file) throws IOException {
+    ProcessBuilder builder = new ProcessBuilder("rm", "-rf", file.getAbsolutePath());
+    Process process = null;
+    int exitCode = -1;
+
+    try {
+      // In order to avoid deadlocks, consume the stdout (and stderr) of the process
+      builder.redirectErrorStream(true);
+      builder.redirectOutput(new File("/dev/null"));
+
+      process = builder.start();
+
+      exitCode = process.waitFor();
+    } catch (Exception e) {
+      throw new IOException("Failed to delete: " + file.getAbsolutePath(), e);
+    } finally {
+      if (process != null && process.isAlive()) {
+        process.destroy();
+      }
+    }
+
+    if (exitCode != 0 || file.exists()) {
+      throw new IOException("Failed to delete: " + file.getAbsolutePath());
+    }
+  }
+
   private static File[] listFilesSafely(File file) throws IOException {
     if (file.exists()) {
       File[] files = file.listFiles();

http://git-wip-us.apache.org/repos/asf/spark/blob/c1fd9cac/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
----------------------------------------------------------------------
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index 62a1fb4..81e0194 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -27,12 +27,17 @@ import com.google.common.io.Closeables;
 import com.google.common.io.Files;
 
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.JavaUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Manages some sort-shuffle data, including the creation
  * and cleanup of directories that can be read by the {@link ExternalShuffleBlockResolver}.
  */
 public class TestShuffleDataContext {
+  private static final Logger logger = LoggerFactory.getLogger(TestShuffleDataContext.class);
+
   public final String[] localDirs;
   public final int subDirsPerLocalDir;
 
@@ -53,7 +58,11 @@ public class TestShuffleDataContext {
 
   public void cleanup() {
     for (String localDir : localDirs) {
-      deleteRecursively(new File(localDir));
+      try {
+        JavaUtils.deleteRecursively(new File(localDir));
+      } catch (IOException e) {
+        logger.warn("Unable to cleanup localDir = " + localDir, e);
+      }
     }
   }
 
@@ -92,17 +101,4 @@ public class TestShuffleDataContext {
   public ExecutorShuffleInfo createExecutorInfo(String shuffleManager) {
     return new ExecutorShuffleInfo(localDirs, subDirsPerLocalDir, shuffleManager);
   }
-
-  private static void deleteRecursively(File f) {
-    assert f != null;
-    if (f.isDirectory()) {
-      File[] children = f.listFiles();
-      if (children != null) {
-        for (File child : children) {
-          deleteRecursively(child);
-        }
-      }
-    }
-    f.delete();
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message