hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject git commit: HADOOP-11021. Configurable replication factor in the hadoop archive command. Contributed by Zhe Zhang.
Date Fri, 29 Aug 2014 21:47:45 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 5889f4d5f -> 075bb9e38


HADOOP-11021. Configurable replication factor in the hadoop archive command. Contributed by
Zhe Zhang.

(cherry picked from commit ea1c6f31c2d2ea5b38ed57e2aa241d122103a721)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/075bb9e3
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/075bb9e3
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/075bb9e3

Branch: refs/heads/branch-2
Commit: 075bb9e380994a267ecba15b7e410f147dcfc35a
Parents: 5889f4d
Author: Andrew Wang <wang@apache.org>
Authored: Fri Aug 29 14:44:37 2014 -0700
Committer: Andrew Wang <wang@apache.org>
Committed: Fri Aug 29 14:47:28 2014 -0700

----------------------------------------------------------------------
 .../src/site/markdown/HadoopArchives.md.vm      | 12 ++++--
 .../org/apache/hadoop/tools/HadoopArchives.java | 12 +++++-
 .../apache/hadoop/tools/TestHadoopArchives.java | 41 ++++++++++++++++++++
 3 files changed, 59 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/075bb9e3/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm
index 0cc0f1c..db0a25f 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm
@@ -38,7 +38,7 @@ Overview
 How to Create an Archive
 ------------------------
 
-  `Usage: hadoop archive -archiveName name -p <parent> <src>* <dest>`
+  `Usage: hadoop archive -archiveName name -p <parent> [-r <replication factor>]
<src>* <dest>`
 
   -archiveName is the name of the archive you would like to create. An example
   would be foo.har. The name should have a \*.har extension. The parent argument
@@ -52,9 +52,12 @@ How to Create an Archive
   would need a map reduce cluster to run this. For a detailed example the later
   sections.
 
+  -r indicates the desired replication factor; if this optional argument is
+  not specified, a replication factor of 10 will be used.
+
   If you just want to archive a single directory /foo/bar then you can just use
 
-  `hadoop archive -archiveName zoo.har -p /foo/bar /outputdir`
+  `hadoop archive -archiveName zoo.har -p /foo/bar -r 3 /outputdir`
 
 How to Look Up Files in Archives
 --------------------------------
@@ -90,14 +93,15 @@ Archives Examples
 
 $H3 Creating an Archive
 
-  `hadoop archive -archiveName foo.har -p /user/hadoop dir1 dir2 /user/zoo`
+  `hadoop archive -archiveName foo.har -p /user/hadoop -r 3 dir1 dir2 /user/zoo`
 
   The above example is creating an archive using /user/hadoop as the relative
   archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2
   will be archived in the following file system directory -- /user/zoo/foo.har.
   Archiving does not delete the input files. If you want to delete the input
   files after creating the archives (to reduce namespace), you will have to do
-  it on your own. 
+  it on your own. In this example, because `-r 3` is specified, a replication
+  factor of 3 will be used.
 
 $H3 Looking Up Files
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/075bb9e3/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
index 93994b8..e53576d 100644
--- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
+++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
@@ -97,9 +97,12 @@ public class HadoopArchives implements Tool {
   long partSize = 2 * 1024 * 1024 * 1024l;
   /** size of blocks in hadoop archives **/
   long blockSize = 512 * 1024 * 1024l;
+  /** the desired replication degree; default is 10 **/
+  short repl = 10;
 
   private static final String usage = "archive"
-  + " -archiveName NAME -p <parent path> <src>* <dest>" +
+  + " -archiveName NAME -p <parent path> [-r <replication factor>]" +
+      "<src>* <dest>" +
   "\n";
   
  
@@ -542,7 +545,7 @@ public class HadoopArchives implements Tool {
       srcWriter.close();
     }
     //increase the replication of src files
-    jobfs.setReplication(srcFiles, (short) 10);
+    jobfs.setReplication(srcFiles, repl);
     conf.setInt(SRC_COUNT_LABEL, numFiles);
     conf.setLong(TOTAL_SIZE_LABEL, totalSize);
     int numMaps = (int)(totalSize/partSize);
@@ -835,6 +838,11 @@ public class HadoopArchives implements Tool {
       }
 
       i+=2;
+
+      if ("-r".equals(args[i])) {
+        repl = Short.parseShort(args[i+1]);
+        i+=2;
+      }
       //read the rest of the paths
       for (; i < args.length; i++) {
         if (i == (args.length - 1)) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/075bb9e3/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
index 65bbbe4..e7eef3f 100644
--- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
+++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
@@ -157,6 +157,24 @@ public class TestHadoopArchives {
     final List<String> harPaths = lsr(shell, fullHarPathStr);
     Assert.assertEquals(originalPaths, harPaths);
   }
+
+  @Test
+  public void testRelativePathWitRepl() throws Exception {
+    final Path sub1 = new Path(inputPath, "dir1");
+    fs.mkdirs(sub1);
+    createFile(inputPath, fs, sub1.getName(), "a");
+    final FsShell shell = new FsShell(conf);
+
+    final List<String> originalPaths = lsr(shell, "input");
+    System.out.println("originalPaths: " + originalPaths);
+
+    // make the archive:
+    final String fullHarPathStr = makeArchiveWithRepl();
+
+    // compare results:
+    final List<String> harPaths = lsr(shell, fullHarPathStr);
+    Assert.assertEquals(originalPaths, harPaths);
+  }
   
 @Test
   public void testPathWithSpaces() throws Exception {
@@ -625,6 +643,29 @@ public class TestHadoopArchives {
     assertEquals(0, ToolRunner.run(har, args));
     return fullHarPathStr;
   }
+
+  /*
+ * Run the HadoopArchives tool to create an archive on the
+ * given file system with a specified replication degree.
+ */
+  private String makeArchiveWithRepl() throws Exception {
+    final String inputPathStr = inputPath.toUri().getPath();
+    System.out.println("inputPathStr = " + inputPathStr);
+
+    final URI uri = fs.getUri();
+    final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
+        + archivePath.toUri().getPath() + Path.SEPARATOR;
+
+    final String harName = "foo.har";
+    final String fullHarPathStr = prefix + harName;
+    final String[] args = { "-archiveName", harName, "-p", inputPathStr,
+        "-r 3", "*", archivePath.toString() };
+    System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
+        HADOOP_ARCHIVES_JAR);
+    final HadoopArchives har = new HadoopArchives(conf);
+    assertEquals(0, ToolRunner.run(har, args));
+    return fullHarPathStr;
+  }
   
   @Test
   /*


Mime
View raw message