hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cmcc...@apache.org
Subject hadoop git commit: HADOOP-11785. Reduce the number of listStatus operation in distcp buildListing (Zoran Dimitrijevic via Colin P. McCabe)
Date Fri, 03 Apr 2015 21:09:01 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 28e0602e7 -> 386b90a70


HADOOP-11785. Reduce the number of listStatus operation in distcp buildListing (Zoran Dimitrijevic
via Colin P. McCabe)

(cherry picked from commit 932730df7d62077f7356464ad27f69469965d77a)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/386b90a7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/386b90a7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/386b90a7

Branch: refs/heads/branch-2
Commit: 386b90a70044e895460de9896288473ac4cc4621
Parents: 28e0602
Author: Colin Patrick Mccabe <cmccabe@cloudera.com>
Authored: Fri Apr 3 14:08:25 2015 -0700
Committer: Colin Patrick Mccabe <cmccabe@cloudera.com>
Committed: Fri Apr 3 14:08:53 2015 -0700

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |  3 ++
 .../apache/hadoop/tools/SimpleCopyListing.java  | 41 +++++++++-----------
 2 files changed, 21 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/386b90a7/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 261ec54..b8a6b8f 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -44,6 +44,9 @@ Release 2.8.0 - UNRELEASED
 
   OPTIMIZATIONS
 
+    HADOOP-11785. Reduce the number of listStatus operation in distcp
+    buildListing (Zoran Dimitrijevic via Colin P. McCabe)
+
   BUG FIXES
 
     HADOOP-11568. Description on usage of classpath in hadoop command is

http://git-wip-us.apache.org/repos/asf/hadoop/blob/386b90a7/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
index 6dc827a..e8a23aa 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -193,12 +193,12 @@ public class SimpleCopyListing extends CopyListing {
             writeToFileListing(fileListWriter, sourceCopyListingStatus,
                 sourcePathRoot, options);
 
-            if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
+            if (sourceStatus.isDirectory()) {
               if (LOG.isDebugEnabled()) {
-                LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
+                LOG.debug("Traversing source dir: " + sourceStatus.getPath());
               }
-              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
-                  options);
+              traverseDirectory(fileListWriter, sourceFS, sourceStatus,
+                                sourcePathRoot, options);
             }
           }
         }
@@ -275,22 +275,17 @@ public class SimpleCopyListing extends CopyListing {
             SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
   }
 
-  private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
-                                    FileStatus fileStatus) throws IOException {
-    return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length
> 0;
-  }
-
   private static FileStatus[] getChildren(FileSystem fileSystem,
                                          FileStatus parent) throws IOException {
     return fileSystem.listStatus(parent.getPath());
   }
 
-  private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
-                                         FileStatus sourceStatus,
-                                         Path sourcePathRoot,
-                                         DistCpOptions options)
-                                         throws IOException {
-    FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
+  private void traverseDirectory(SequenceFile.Writer fileListWriter,
+                                 FileSystem sourceFS,
+                                 FileStatus sourceStatus,
+                                 Path sourcePathRoot,
+                                 DistCpOptions options)
+                                 throws IOException {
     final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
     final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
     final boolean preserveRawXattrs = options.shouldPreserveRawXattrs();
@@ -299,9 +294,9 @@ public class SimpleCopyListing extends CopyListing {
 
     while (!pathStack.isEmpty()) {
       for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
-        if (LOG.isDebugEnabled())
-          LOG.debug("Recording source-path: "
-                    + sourceStatus.getPath() + " for copy.");
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Recording source-path: " + child.getPath() + " for copy.");
+        }
         CopyListingFileStatus childCopyListingStatus =
           DistCpUtils.toCopyListingFileStatus(sourceFS, child,
             preserveAcls && child.isDirectory(),
@@ -309,16 +304,16 @@ public class SimpleCopyListing extends CopyListing {
             preserveRawXattrs && child.isDirectory());
         writeToFileListing(fileListWriter, childCopyListingStatus,
              sourcePathRoot, options);
-        if (isDirectoryAndNotEmpty(sourceFS, child)) {
-          if (LOG.isDebugEnabled())
-            LOG.debug("Traversing non-empty source dir: "
-                       + sourceStatus.getPath());
+        if (child.isDirectory()) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Traversing into source dir: " + child.getPath());
+          }
           pathStack.push(child);
         }
       }
     }
   }
-  
+
   private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
       CopyListingFileStatus fileStatus, Path sourcePathRoot,
       DistCpOptions options) throws IOException {


Mime
View raw message