gobblin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hut...@apache.org
Subject incubator-gobblin git commit: [GOBBLIN-214] Fix the filtering issue in listFilesRecursively
Date Fri, 18 Aug 2017 23:42:42 GMT
Repository: incubator-gobblin
Updated Branches:
  refs/heads/master dddc0b3ea -> eae5e6d26


[GOBBLIN-214] Fix the filtering issue in listFilesRecursively

Closes #2067 from yukuai518/pathfilter


Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/eae5e6d2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/eae5e6d2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/eae5e6d2

Branch: refs/heads/master
Commit: eae5e6d268964a6f3dfa0ae98c3639333f3b0854
Parents: dddc0b3
Author: Kuai Yu <kuyu@linkedin.com>
Authored: Fri Aug 18 16:42:34 2017 -0700
Committer: Hung Tran <hutran@linkedin.com>
Committed: Fri Aug 18 16:42:34 2017 -0700

----------------------------------------------------------------------
 .../org/apache/gobblin/util/FileListUtils.java  |  2 +-
 .../apache/gobblin/util/FileListUtilsTest.java  | 57 ++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/eae5e6d2/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java
----------------------------------------------------------------------
diff --git a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java
index 02920c2..51bf66d 100644
--- a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java
+++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java
@@ -132,7 +132,7 @@ public class FileListUtils {
             files.add(status);
           }
         } else {
-          files.add(status);
+          listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories,
includeEmptyDirectories);
         }
       }
     } else if (fileFilter.accept(fileStatus.getPath())) {

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/eae5e6d2/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java
----------------------------------------------------------------------
diff --git a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java
b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java
index 388e311..e739e00 100644
--- a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java
+++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java
@@ -146,6 +146,63 @@ public class FileListUtilsTest {
     }
   }
 
+  public void testListAllFiles () throws IOException {
+    FileSystem localFs = FileSystem.getLocal(new Configuration());
+    Path baseDir = new Path(FILE_UTILS_TEST_DIR, "listAllFiles");
+    System.out.println (baseDir);
+    try {
+      if (localFs.exists(baseDir)) {
+        localFs.delete(baseDir, true);
+      }
+      localFs.mkdirs(baseDir);
+
+      // Empty root directory
+      List<FileStatus> testFiles = FileListUtils.listFilesRecursively(localFs, baseDir,
FileListUtils.NO_OP_PATH_FILTER);
+      Assert.assertTrue(testFiles.size() == 0);
+
+      // With two avro files (1.avro, 2.avro)
+      Path file1 = new Path(baseDir, "1.avro");
+      localFs.create(file1);
+      Path file2 = new Path(baseDir, "2.avro");
+      localFs.create(file2);
+      testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, FileListUtils.NO_OP_PATH_FILTER);
+      Assert.assertTrue(testFiles.size() == 2);
+
+      // With an avro schema file (part.avsc)
+      Path avsc = new Path(baseDir, "part.avsc");
+      localFs.create(avsc);
+      testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, FileListUtils.NO_OP_PATH_FILTER);
+      Assert.assertTrue(testFiles.size() == 3);
+      testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, (path)->path.getName().endsWith(".avro"));
+      Assert.assertTrue(testFiles.size() == 2);
+
+      // A complicated hierarchy
+      // baseDir ____ 1.avro
+      //        |____ 2.avro
+      //        |____ part.avsc
+      //        |____ subDir ____ 3.avro
+      //                    |____ subDir2 ____ 4.avro
+      //                                 |____ part2.avsc
+      Path subDir = new Path(baseDir, "subDir");
+      localFs.mkdirs(subDir);
+      Path file3 = new Path(subDir, "3.avro");
+      localFs.create(file3);
+      Path subDir2 = new Path(subDir, "subDir2");
+      localFs.mkdirs(subDir2);
+      Path file4 = new Path(subDir2, "4.avro");
+      localFs.create(file4);
+      Path avsc2 = new Path(subDir2, "part2.avsc");
+      localFs.create(avsc2);
+
+      testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, (path)->path.getName().endsWith(".avro"));
+      Assert.assertTrue(testFiles.size() == 4);
+      testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, FileListUtils.NO_OP_PATH_FILTER);
+      Assert.assertTrue(testFiles.size() == 6);
+    } finally {
+      localFs.delete(baseDir, true);
+    }
+  }
+
   public void testListFilesToCopyAtPath() throws IOException {
     FileSystem localFs = FileSystem.getLocal(new Configuration());
     Path baseDir = new Path(FILE_UTILS_TEST_DIR, "fileListTestDir4");


Mime
View raw message