Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2872718CB2 for ; Wed, 22 Jul 2015 04:57:56 +0000 (UTC) Received: (qmail 53678 invoked by uid 500); 22 Jul 2015 04:57:52 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 53567 invoked by uid 500); 22 Jul 2015 04:57:52 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 53551 invoked by uid 99); 22 Jul 2015 04:57:52 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 22 Jul 2015 04:57:52 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 768CDE0F7F; Wed, 22 Jul 2015 04:57:52 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: vinayakumarb@apache.org To: common-commits@hadoop.apache.org Date: Wed, 22 Jul 2015 04:57:52 -0000 Message-Id: <070dd75ff58e408f8d78a9df3f38a172@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] hadoop git commit: HADOOP-12017. Hadoop archives command should use configurable replication factor when closing (Contributed by Bibin A Chundatt) Repository: hadoop Updated Branches: refs/heads/branch-2 53ed25c3d -> 03d68b557 refs/heads/trunk 31f117138 -> 94c6a4aa8 HADOOP-12017. Hadoop archives command should use configurable replication factor when closing (Contributed by Bibin A Chundatt) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/94c6a4aa Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/94c6a4aa Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/94c6a4aa Branch: refs/heads/trunk Commit: 94c6a4aa85e7d98e9b532b330f30783315f4334b Parents: 31f1171 Author: Vinayakumar B Authored: Wed Jul 22 10:25:49 2015 +0530 Committer: Vinayakumar B Committed: Wed Jul 22 10:25:49 2015 +0530 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../org/apache/hadoop/tools/HadoopArchives.java | 21 ++++++++++------ .../src/site/markdown/HadoopArchives.md.vm | 2 +- .../apache/hadoop/tools/TestHadoopArchives.java | 26 ++++++++++++-------- 4 files changed, 33 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/94c6a4aa/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 5b51bce..3d101d4 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -992,6 +992,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12051. ProtobufRpcEngine.invoke() should use Exception.toString() over getMessage() in logging/span events. (Varun Saxena via stevel) + HADOOP-12017. Hadoop archives command should use configurable replication + factor when closing (Bibin A Chundatt via vinayakumarb) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/94c6a4aa/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java index 330830b..ee14850 100644 --- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java @@ -100,15 +100,17 @@ public class HadoopArchives implements Tool { static final String SRC_PARENT_LABEL = NAME + ".parent.path"; /** the size of the blocks that will be created when archiving **/ static final String HAR_BLOCKSIZE_LABEL = NAME + ".block.size"; - /**the size of the part files that will be created when archiving **/ + /** the replication factor for the file in archiving. **/ + static final String HAR_REPLICATION_LABEL = NAME + ".replication.factor"; + /** the size of the part files that will be created when archiving **/ static final String HAR_PARTSIZE_LABEL = NAME + ".partfile.size"; /** size of each part file size **/ long partSize = 2 * 1024 * 1024 * 1024l; /** size of blocks in hadoop archives **/ long blockSize = 512 * 1024 * 1024l; - /** the desired replication degree; default is 10 **/ - short repl = 10; + /** the desired replication degree; default is 3 **/ + short repl = 3; private static final String usage = "archive" + " <-archiveName .har> <-p > [-r ]" + @@ -475,6 +477,7 @@ public class HadoopArchives implements Tool { conf.setLong(HAR_PARTSIZE_LABEL, partSize); conf.set(DST_HAR_LABEL, archiveName); conf.set(SRC_PARENT_LABEL, parentPath.makeQualified(fs).toString()); + conf.setInt(HAR_REPLICATION_LABEL, repl); Path outputPath = new Path(dest, archiveName); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem outFs = outputPath.getFileSystem(conf); @@ -549,8 +552,6 @@ public class HadoopArchives implements Tool { } finally { srcWriter.close(); } - //increase the replication of src files - jobfs.setReplication(srcFiles, repl); conf.setInt(SRC_COUNT_LABEL, numFiles); conf.setLong(TOTAL_SIZE_LABEL, totalSize); int numMaps = (int)(totalSize/partSize); @@ -587,6 +588,7 @@ public class HadoopArchives implements Tool { FileSystem destFs = null; byte[] buffer; int buf_size = 128 * 1024; + private int replication = 3; long blockSize = 512 * 1024 * 1024l; // configure the mapper and create @@ -595,7 +597,7 @@ public class HadoopArchives implements Tool { // tmp files. public void configure(JobConf conf) { this.conf = conf; - + replication = conf.getInt(HAR_REPLICATION_LABEL, 3); // this is tightly tied to map reduce // since it does not expose an api // to get the partition @@ -712,6 +714,7 @@ public class HadoopArchives implements Tool { public void close() throws IOException { // close the part files. partStream.close(); + destFs.setReplication(tmpOutput, (short) replication); } } @@ -732,6 +735,7 @@ public class HadoopArchives implements Tool { private int numIndexes = 1000; private Path tmpOutputDir = null; private int written = 0; + private int replication = 3; private int keyVal = 0; // configure @@ -740,6 +744,7 @@ public class HadoopArchives implements Tool { tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf); masterIndex = new Path(tmpOutputDir, "_masterindex"); index = new Path(tmpOutputDir, "_index"); + replication = conf.getInt(HAR_REPLICATION_LABEL, 3); try { fs = masterIndex.getFileSystem(conf); if (fs.exists(masterIndex)) { @@ -798,8 +803,8 @@ public class HadoopArchives implements Tool { outStream.close(); indexStream.close(); // try increasing the replication - fs.setReplication(index, (short) 5); - fs.setReplication(masterIndex, (short) 5); + fs.setReplication(index, (short) replication); + fs.setReplication(masterIndex, (short) replication); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/94c6a4aa/hadoop-tools/hadoop-archives/src/site/markdown/HadoopArchives.md.vm ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-archives/src/site/markdown/HadoopArchives.md.vm b/hadoop-tools/hadoop-archives/src/site/markdown/HadoopArchives.md.vm index be557a7..8bbb1ea 100644 --- a/hadoop-tools/hadoop-archives/src/site/markdown/HadoopArchives.md.vm +++ b/hadoop-tools/hadoop-archives/src/site/markdown/HadoopArchives.md.vm @@ -53,7 +53,7 @@ How to Create an Archive sections. -r indicates the desired replication factor; if this optional argument is - not specified, a replication factor of 10 will be used. + not specified, a replication factor of 3 will be used. If you just want to archive a single directory /foo/bar then you can just use http://git-wip-us.apache.org/repos/asf/hadoop/blob/94c6a4aa/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index d8222dc..165c515 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -21,7 +21,6 @@ package org.apache.hadoop.tools; import java.io.ByteArrayOutputStream; import java.io.FilterInputStream; import java.io.IOException; -import java.io.OutputStream; import java.io.PrintStream; import java.net.URI; import java.util.ArrayList; @@ -39,7 +38,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.HarFileSystem; import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.JarFinder; @@ -110,13 +111,9 @@ public class TestHadoopArchives { conf.set(CapacitySchedulerConfiguration.PREFIX + CapacitySchedulerConfiguration.ROOT + ".default." + CapacitySchedulerConfiguration.CAPACITY, "100"); - dfscluster = new MiniDFSCluster - .Builder(conf) - .checkExitOnShutdown(true) - .numDataNodes(2) - .format(true) - .racks(null) - .build(); + dfscluster = + new MiniDFSCluster.Builder(conf).checkExitOnShutdown(true) + .numDataNodes(3).format(true).racks(null).build(); fs = dfscluster.getFileSystem(); @@ -753,12 +750,21 @@ public class TestHadoopArchives { final String harName = "foo.har"; final String fullHarPathStr = prefix + harName; - final String[] args = { "-archiveName", harName, "-p", inputPathStr, "-r", - "3", "*", archivePath.toString() }; + final String[] args = + { "-archiveName", harName, "-p", inputPathStr, "-r", "2", "*", + archivePath.toString() }; System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH, HADOOP_ARCHIVES_JAR); final HadoopArchives har = new HadoopArchives(conf); assertEquals(0, ToolRunner.run(har, args)); + RemoteIterator listFiles = + fs.listFiles(new Path(archivePath.toString() + "/" + harName), false); + while (listFiles.hasNext()) { + LocatedFileStatus next = listFiles.next(); + if (!next.getPath().toString().endsWith("_SUCCESS")) { + assertEquals(next.getPath().toString(), 2, next.getReplication()); + } + } return fullHarPathStr; }