hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ste...@apache.org
Subject [2/2] hadoop git commit: HADOOP-15384. distcp numListstatusThreads option doesn't get to -delete scan. Contributed by Steve Loughran.
Date Tue, 10 Jul 2018 09:51:12 GMT
HADOOP-15384. distcp numListstatusThreads option doesn't get to -delete scan.
Contributed by Steve Loughran.

(cherry picked from commit ca8b80bf59c0570bb9172208d3a6c993a6854514)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d54241e9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d54241e9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d54241e9

Branch: refs/heads/branch-3.1
Commit: d54241e9c995c8f63e6a6317599b858f486763ce
Parents: 8ad82ea
Author: Steve Loughran <stevel@apache.org>
Authored: Tue Jul 10 10:50:40 2018 +0100
Committer: Steve Loughran <stevel@apache.org>
Committed: Tue Jul 10 10:50:40 2018 +0100

----------------------------------------------------------------------
 .../java/org/apache/hadoop/tools/DistCpOptions.java    |  5 ++++-
 .../org/apache/hadoop/tools/mapred/CopyCommitter.java  | 13 +++++++++++--
 .../tools/contract/AbstractContractDistCpTest.java     |  2 +-
 3 files changed, 16 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d54241e9/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
index ea99016..cff04eb 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@@ -387,7 +387,10 @@ public final class DistCpOptions {
       DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.TRACK_MISSING,
           String.valueOf(trackPath));
     }
-
+    if (numListstatusThreads > 0) {
+      DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.NUM_LISTSTATUS_THREADS,
+          Integer.toString(numListstatusThreads));
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d54241e9/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index 07eacb0..38106fa 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -392,6 +392,9 @@ public class CopyCommitter extends FileOutputCommitter {
     Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
     FileSystem clusterFS = sourceListing.getFileSystem(conf);
     Path sortedSourceListing = DistCpUtils.sortListing(conf, sourceListing);
+    long sourceListingCompleted = System.currentTimeMillis();
+    LOG.info("Source listing completed in {}",
+        formatDuration(sourceListingCompleted - listingStart));
 
     // Similarly, create the listing of target-files. Sort alphabetically.
     Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
@@ -409,8 +412,8 @@ public class CopyCommitter extends FileOutputCommitter {
     // Walk both source and target file listings.
     // Delete all from target that doesn't also exist on source.
     long deletionStart = System.currentTimeMillis();
-    LOG.info("Listing completed in {}",
-        formatDuration(deletionStart - listingStart));
+    LOG.info("Destination listing completed in {}",
+        formatDuration(deletionStart - sourceListingCompleted));
 
     long deletedEntries = 0;
     long filesDeleted = 0;
@@ -545,9 +548,15 @@ public class CopyCommitter extends FileOutputCommitter {
     // Set up options to be the same from the CopyListing.buildListing's
     // perspective, so to collect similar listings as when doing the copy
     //
+    // thread count is picked up from the job
+    int threads = conf.getInt(DistCpConstants.CONF_LABEL_LISTSTATUS_THREADS,
+        DistCpConstants.DEFAULT_LISTSTATUS_THREADS);
+    LOG.info("Scanning destination directory {} with thread count: {}",
+        targetFinalPath, threads);
     DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath)
         .withOverwrite(overwrite)
         .withSyncFolder(syncFolder)
+        .withNumListstatusThreads(threads)
         .build();
     DistCpContext distCpContext = new DistCpContext(options);
     distCpContext.setTargetPathExists(targetPathExists);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d54241e9/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
index a5e0a03..1458991 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
@@ -572,7 +572,7 @@ public abstract class AbstractContractDistCpTest
   private DistCpOptions buildWithStandardOptions(
       DistCpOptions.Builder builder) {
     return builder
-        .withNumListstatusThreads(8)
+        .withNumListstatusThreads(DistCpOptions.MAX_NUM_LISTSTATUS_THREADS)
         .build();
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message