avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bus...@apache.org
Subject avro git commit: AVRO-1787 Add support of directories & globs to concat and cat. Contributed by Clément MATHIEU.
Date Fri, 27 Jan 2017 16:58:30 GMT
Repository: avro
Updated Branches:
  refs/heads/master d9338a4cf -> f8c70a3a9


AVRO-1787 Add support of directories & globs to concat and cat. Contributed by Clément
MATHIEU.

Signed-off-by: Sean Busbey <busbey@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/avro/repo
Commit: http://git-wip-us.apache.org/repos/asf/avro/commit/f8c70a3a
Tree: http://git-wip-us.apache.org/repos/asf/avro/tree/f8c70a3a
Diff: http://git-wip-us.apache.org/repos/asf/avro/diff/f8c70a3a

Branch: refs/heads/master
Commit: f8c70a3a9fe5a410363544493a03dc2d10340cbd
Parents: d9338a4
Author: Clément MATHIEU <clement@unportant.info>
Authored: Tue Jan 24 11:09:03 2017 -0600
Committer: Sean Busbey <busbey@apache.org>
Committed: Fri Jan 27 09:59:21 2017 -0600

----------------------------------------------------------------------
 .../java/org/apache/avro/tool/ConcatTool.java   | 28 ++++++-
 .../main/java/org/apache/avro/tool/Util.java    | 39 ++++++----
 .../java/org/apache/avro/tool/TestCatTool.java  | 14 ++++
 .../org/apache/avro/tool/TestConcatTool.java    | 79 ++++++++++++++++++++
 4 files changed, 143 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/avro/blob/f8c70a3a/lang/java/tools/src/main/java/org/apache/avro/tool/ConcatTool.java
----------------------------------------------------------------------
diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/ConcatTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/ConcatTool.java
index 6026796..e782321 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/ConcatTool.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/ConcatTool.java
@@ -17,9 +17,11 @@
  */
 package org.apache.avro.tool;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintStream;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
@@ -33,6 +35,7 @@ import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.Path;
 
 /**
  * Tool to concatenate avro files with the same schema and non-reserved
@@ -65,7 +68,7 @@ public class ConcatTool implements Tool {
     Map<String, byte[]> metadata = new TreeMap<String, byte[]>();
     String inputCodec = null;
 
-    for (String inFile : args) {
+    for (String inFile : expandsInputFiles(args)) {
       InputStream input = Util.fileOrStdin(inFile, in);
       DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
         input, new GenericDatumReader<GenericRecord>());
@@ -124,6 +127,24 @@ public class ConcatTool implements Tool {
     return 0;
   }
 
+  /** Processes a list of input files to expand directories if needed. */
+  private static List<String> expandsInputFiles(List<String> args) throws IOException
{
+    List<String> files = new ArrayList<String>();
+
+    for (String arg : args) {
+      if (arg.equals("-")) {
+        files.add(arg);
+      } else {
+        List<Path> paths = Util.getFiles(arg);
+        for (Path path : paths) {
+          files.add(path.toString());
+        }
+      }
+    }
+
+    return files;
+  }
+
   private void printHelp(PrintStream out) {
     out.println("concat [input-file...] output-file");
     out.println();
@@ -136,8 +157,9 @@ public class ConcatTool implements Tool {
     out.println("  3 if the codecs don't match");
     out.println("If no input files are given stdin will be used. The tool");
     out.println("0 on success. A dash ('-') can be given as an input file");
-    out.println("to use stdin, and as an output file to use stdout.");
-
+    out.println("to use stdin, and as an output file to use stdout. If a directory");
+    out.println("is given as an input-file all the files within this directory");
+    out.println("are used.");
   }
 
 @Override

http://git-wip-us.apache.org/repos/asf/avro/blob/f8c70a3a/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java
----------------------------------------------------------------------
diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java b/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java
index 708bb41..9f1cae1 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java
@@ -22,6 +22,7 @@ import static org.apache.avro.file.DataFileConstants.DEFLATE_CODEC;
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -168,42 +169,52 @@ class Util {
     }
   }
 
-  /**If pathname is a file, this method returns a list with a single absolute Path to that
file,
-   * if pathname is a directory, this method returns a list of Pathes to all the files within
-   * this directory.
-   * Only files inside that directory are included, no subdirectories or files in subdirectories
-   * will be added.
+  /**
+   * If pathname is a file, this method returns a list with a single absolute Path to that
file.
+   * If pathname is a directory, this method returns a list of Pathes to all the files within
+   * this directory. Only files inside that directory are included, no subdirectories or
files
+   * in subdirectories will be added.
+   * If pathname is a glob pattern, all files matching the pattern are included.
+   *
    * The List is sorted alphabetically.
-   * @param fileOrDirName filename or directoryname
+   * @param fileOrDirName filename, directoryname or a glob pattern
    * @return A Path List
    * @throws IOException
    */
-  static List<Path> getFiles(String fileOrDirName)
-    throws IOException {
+  static List<Path> getFiles(String fileOrDirName) throws IOException {
     List<Path> pathList = new ArrayList<Path>();
     Path path = new Path(fileOrDirName);
     FileSystem fs = path.getFileSystem(new Configuration());
 
     if (fs.isFile(path)) {
       pathList.add(path);
-    }
-    else if (fs.getFileStatus(path).isDir()) {
+    } else if (fs.isDirectory(path)) {
       for (FileStatus status : fs.listStatus(path)) {
         if(!status.isDir()) {
           pathList.add(status.getPath());
         }
       }
+    } else {
+      FileStatus[] fileStatuses = fs.globStatus(path);
+      if (fileStatuses != null) {
+        for (FileStatus status : fileStatuses) {
+          pathList.add(status.getPath());
+        }
+      } else {
+        throw new FileNotFoundException(fileOrDirName);
+      }
     }
     Collections.sort(pathList);
     return pathList;
   }
 
   /**
-   * This method returns a list which contains a path to every given file
-   * in the input and a path to every file inside a given directory.
+   * Concatenate the result of {@link #getFiles(String)} applied to all file or directory
names.
    * The list is sorted alphabetically and contains no subdirectories or files within those.
-   * @param fileOrDirNames A list of filenames and directorynames
-   * @return A list of Pathes, one for each file
+   *
+   * The list is sorted alphabetically.
+   * @param fileOrDirNames A list of filenames, directorynames or glob patterns
+   * @return A list of Paths, one for each file
    * @throws IOException
    */
   static List<Path> getFiles(List<String> fileOrDirNames)

http://git-wip-us.apache.org/repos/asf/avro/blob/f8c70a3a/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
----------------------------------------------------------------------
diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
index 312bd76..39e45de 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
@@ -180,6 +180,20 @@ public class TestCatTool {
       args);
     assertEquals(0, returnCode);
     assertEquals(LIMIT_WITHIN_INPUT_BOUNDS, numRowsInFile(output));
+
+//    glob input
+    args = asList(
+      new File(input1.getParentFile(), "/*").getAbsolutePath(),
+      output.getAbsolutePath(),
+      "--offset" , String.valueOf(OFFSET),
+      "--limit" , String.valueOf(LIMIT_WITHIN_INPUT_BOUNDS));
+    returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+    assertEquals(LIMIT_WITHIN_INPUT_BOUNDS, numRowsInFile(output));
   }
 
 

http://git-wip-us.apache.org/repos/asf/avro/blob/f8c70a3a/lang/java/tools/src/test/java/org/apache/avro/tool/TestConcatTool.java
----------------------------------------------------------------------
diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestConcatTool.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestConcatTool.java
index af31ccb..6fdbddf 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestConcatTool.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestConcatTool.java
@@ -25,7 +25,9 @@ import static org.junit.Assert.assertTrue;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.PrintStream;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -108,6 +110,83 @@ public class TestConcatTool {
   }
 
   @Test
+  public void testDirConcat() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+
+    File dir = AvroTestUtil.tempDirectory(getClass(), "input");
+
+    for (int i = 0; i < 3; i++) {
+      String filename = "input" + i + ".avro";
+      File input = generateData(filename, Type.STRING, metadata, DEFLATE);
+      boolean ok = input.renameTo(new File(dir, input.getName()));
+      assertTrue(ok);
+    }
+
+    File output = AvroTestUtil.tempFile(getClass(), "default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      dir.getAbsolutePath(),
+      output.getAbsolutePath());
+    int returnCode = new ConcatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+
+    assertEquals(ROWS_IN_INPUT_FILES * 3, numRowsInFile(output));
+  }
+
+  @Test
+  public void testGlobPatternConcat() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+
+    File dir = AvroTestUtil.tempDirectory(getClass(), "input");
+
+    for (int i = 0; i < 3; i++) {
+      String filename = "input" + i + ".avro";
+      File input = generateData(filename, Type.STRING, metadata, DEFLATE);
+      boolean ok = input.renameTo(new File(dir, input.getName()));
+      assertTrue(ok);
+    }
+
+    File output = AvroTestUtil.tempFile(getClass(), "default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      new File(dir, "/*").getAbsolutePath(),
+      output.getAbsolutePath());
+    int returnCode = new ConcatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+
+    assertEquals(ROWS_IN_INPUT_FILES * 3, numRowsInFile(output));
+  }
+
+  @Test(expected = FileNotFoundException.class)
+  public void testFileDoesNotExist() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+
+    File dir = AvroTestUtil.tempDirectory(getClass(), "input");
+
+    File output = AvroTestUtil.tempFile(getClass(), "default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      new File(dir, "/doNotExist").getAbsolutePath(),
+      output.getAbsolutePath());
+    new ConcatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+  }
+
+  @Test
   public void testConcat() throws Exception {
     Map<String, String> metadata = new HashMap<String, String>();
     metadata.put("myMetaKey", "myMetaValue");


Mime
View raw message