hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aajis...@apache.org
Subject hadoop git commit: HADOOP-12657. Add a option to skip newline on empty files with getMerge -nl. Contributed by Kanaka Kumar Avvaru.
Date Fri, 18 Dec 2015 05:01:18 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 e06c29124 -> af4982349


HADOOP-12657. Add a option to skip newline on empty files with getMerge -nl. Contributed by
Kanaka Kumar Avvaru.

(cherry picked from commit 061c05cc05ff6257b14c5c4f25cbcec2d184cda7)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/af498234
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/af498234
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/af498234

Branch: refs/heads/branch-2
Commit: af498234997d68b718abed2be7638819e93f2266
Parents: e06c291
Author: Akira Ajisaka <aajisaka@apache.org>
Authored: Fri Dec 18 13:58:28 2015 +0900
Committer: Akira Ajisaka <aajisaka@apache.org>
Committed: Fri Dec 18 14:00:07 2015 +0900

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |  3 ++
 .../apache/hadoop/fs/shell/CopyCommands.java    | 40 ++++++++++++--------
 .../src/site/markdown/FileSystemShell.md        |  1 +
 .../org/apache/hadoop/fs/TestFsShellCopy.java   | 24 ++++++++++--
 .../src/test/resources/testConf.xml             |  7 +++-
 5 files changed, 55 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/af498234/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 27bae66..a245286 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -55,6 +55,9 @@ Release 2.8.0 - UNRELEASED
 
     HADOOP-12366. expose calculated paths (aw)
 
+    HADOOP-12657. Add a option to skip newline on empty files with getMerge -nl.
+    (Kanaka Kumar Avvaru via aajisaka)
+
   IMPROVEMENTS
 
     HADOOP-12458. Retries is typoed to spell Retires in parts of

http://git-wip-us.apache.org/repos/asf/hadoop/blob/af498234/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
index c4e42c9..e2fad75 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
@@ -53,24 +53,29 @@ class CopyCommands {
   /** merge multiple files together */
   public static class Merge extends FsCommand {
     public static final String NAME = "getmerge";    
-    public static final String USAGE = "[-nl] <src> <localdst>";
+    public static final String USAGE = "[-nl] [-skip-empty-file] "
+        + "<src> <localdst>";
     public static final String DESCRIPTION =
-      "Get all the files in the directories that " +
-      "match the source file pattern and merge and sort them to only " +
-      "one file on local fs. <src> is kept.\n" +
-      "-nl: Add a newline character at the end of each file.";
+        "Get all the files in the directories that "
+        + "match the source file pattern and merge and sort them to only "
+        + "one file on local fs. <src> is kept.\n"
+        + "-nl: Add a newline character at the end of each file.\n"
+        + "-skip-empty-file: Do not add new line character for empty file.";
 
     protected PathData dst = null;
     protected String delimiter = null;
+    private boolean skipEmptyFileDelimiter;
     protected List<PathData> srcs = null;
 
     @Override
     protected void processOptions(LinkedList<String> args) throws IOException {
       try {
-        CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl");
+        CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl",
+            "skip-empty-file");
         cf.parse(args);
 
         delimiter = cf.getOpt("nl") ? "\n" : null;
+        skipEmptyFileDelimiter = cf.getOpt("skip-empty-file");
 
         dst = new PathData(new URI(args.removeLast()), getConf());
         if (dst.exists && dst.stat.isDirectory()) {
@@ -92,21 +97,26 @@ class CopyCommands {
       FSDataOutputStream out = dst.fs.create(dst.path);
       try {
         for (PathData src : srcs) {
-          FSDataInputStream in = src.fs.open(src.path);
-          try {
-            IOUtils.copyBytes(in, out, getConf(), false);
-            if (delimiter != null) {
-              out.write(delimiter.getBytes("UTF-8"));
+          if (src.stat.getLen() != 0) {
+            try (FSDataInputStream in = src.fs.open(src.path)) {
+              IOUtils.copyBytes(in, out, getConf(), false);
+              writeDelimiter(out);
             }
-          } finally {
-            in.close();
+          } else if (!skipEmptyFileDelimiter) {
+            writeDelimiter(out);
           }
         }
       } finally {
         out.close();
-      }      
+      }
     }
- 
+
+    private void writeDelimiter(FSDataOutputStream out) throws IOException {
+      if (delimiter != null) {
+        out.write(delimiter.getBytes("UTF-8"));
+      }
+    }
+
     @Override
     protected void processNonexistentPath(PathData item) throws IOException {
       exitCode = 1; // flag that a path is bad

http://git-wip-us.apache.org/repos/asf/hadoop/blob/af498234/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
index 5aefd49..7efd676 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
@@ -375,6 +375,7 @@ getmerge
 Usage: `hadoop fs -getmerge [-nl] <src> <localdst>`
 
 Takes a source directory and a destination file as input and concatenates files in src into
the destination local file. Optionally -nl can be set to enable adding a newline character
(LF) at the end of each file.
+-skip-empty-file can be used to avoid unwanted newline characters in case of empty files.
 
 Examples:
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/af498234/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
index 1d626f9..6b5de74 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
@@ -318,6 +318,7 @@ public class TestFsShellCopy {
     Path f1 = new Path(root, "f1");
     Path f2 = new Path(root, "f2");
     Path f3 = new Path(root, "f3");
+    Path empty = new Path(root, "empty");
     Path fnf = new Path(root, "fnf");
     Path d = new Path(root, "dir");
     Path df1 = new Path(d, "df1");
@@ -325,7 +326,8 @@ public class TestFsShellCopy {
     Path df3 = new Path(d, "df3");
     
     createFile(f1, f2, f3, df1, df2, df3);
-    
+    createEmptyFile(empty);
+
     int exit;
     // one file, kind of silly
     exit = shell.run(new String[]{
@@ -366,6 +368,13 @@ public class TestFsShellCopy {
     assertEquals(0, exit);
     assertEquals("f1\nf2\n", readFile("out"));
 
+    exit = shell.run(new String[]{
+        "-getmerge", "-nl", "-skip-empty-file",
+        f1.toString(), f2.toString(), empty.toString(),
+    "out" });
+    assertEquals(0, exit);
+    assertEquals("f1\nf2\n", readFile("out"));
+
     // glob three files
     shell.run(new String[]{
         "-getmerge", "-nl",
@@ -374,13 +383,13 @@ public class TestFsShellCopy {
     assertEquals(0, exit);
     assertEquals("f1\nf2\nf3\n", readFile("out"));
 
-    // directory with 3 files, should skip subdir
+    // directory with 1 empty + 3 non empty files, should skip subdir
     shell.run(new String[]{
         "-getmerge", "-nl",
         root.toString(),
         "out" });
     assertEquals(0, exit);
-    assertEquals("f1\nf2\nf3\n", readFile("out"));
+    assertEquals("\nf1\nf2\nf3\n", readFile("out"));
 
     // subdir
     shell.run(new String[]{
@@ -538,7 +547,14 @@ public class TestFsShellCopy {
       out.close();
     }
   }
-  
+
+  private void createEmptyFile(Path ... paths) throws IOException {
+    for (Path path : paths) {
+      FSDataOutputStream out = lfs.create(path);
+      out.close();
+    }
+  }
+
   private String readFile(String out) throws IOException {
     Path path = new Path(out);
     FileStatus stat = lfs.getFileStatus(path);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/af498234/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
index f92d436..693ba98 100644
--- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
+++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
@@ -601,7 +601,7 @@
       <comparators>
         <comparator>
           <type>RegexpComparator</type>
-          <expected-output>^-getmerge \[-nl\] &lt;src&gt; &lt;localdst&gt;
:\s*</expected-output>
+          <expected-output>^-getmerge \[-nl\] \[-skip-empty-file\] &lt;src&gt;
&lt;localdst&gt; :\s*</expected-output>
         </comparator>
         <comparator>
           <type>RegexpComparator</type>
@@ -615,6 +615,11 @@
           <type>RegexpComparator</type>
           <expected-output>^( |\t)*-nl\s+Add a newline character at the end of each
file.( )*</expected-output>
         </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^( |\t)*-skip-empty-file\s+Do not add new line character
for empty file.( )*</expected-output>
+        </comparator>
+
       </comparators>
     </test>
 


Mime
View raw message