hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomwh...@apache.org
Subject svn commit: r723332 - in /hadoop/core/trunk: CHANGES.txt src/core/org/apache/hadoop/fs/FileSystem.java src/test/org/apache/hadoop/fs/TestGlobPaths.java src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
Date Thu, 04 Dec 2008 14:11:24 GMT
Author: tomwhite
Date: Thu Dec  4 06:11:23 2008
New Revision: 723332

URL: http://svn.apache.org/viewvc?rev=723332&view=rev
Log:
HADOOP-3497. Fix bug in overly restrictive file globbing with a PathFilter.

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
    hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
    hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Dec  4 06:11:23 2008
@@ -47,6 +47,9 @@
     HADOOP-4035. Support memory based scheduling in capacity scheduler.
     (Vinod Kumar Vavilapalli via yhemanth)
 
+    HADOOP-3497. Fix bug in overly restrictive file globbing with a
+    PathFilter. (tomwhite)
+
   NEW FEATURES
 
     HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.

Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java Thu Dec  4 06:11:23 2008
@@ -879,26 +879,16 @@
 
     // glob the paths that match the parent path, i.e., [0, components.length-1]
     boolean[] hasGlob = new boolean[]{false};
-    Path[] parentPaths =
-      globPathsLevel(parents, components, level, filter, hasGlob);
+    Path[] parentPaths = globPathsLevel(parents, components, level, hasGlob);
     FileStatus[] results;
     if (parentPaths == null || parentPaths.length == 0) {
       results = null;
     } else {
       // Now work on the last component of the path
       GlobFilter fp = new GlobFilter(components[components.length - 1], filter);
+      results = listStatus(parentPaths, fp);
       if (fp.hasPattern()) { // last component has a pattern
-        // list parent directories and then glob the results
-        results = listStatus(parentPaths, fp);
         hasGlob[0] = true;
-      } else { // last component does not have a pattern
-        // get all the path names
-        for (int i = 0; i < parentPaths.length; i++) {
-          parentPaths[i] = new Path(parentPaths[i],
-              components[components.length - 1]);
-        }
-        // get all their statuses
-        results = getFileStatus(parentPaths);
       }
     }
 
@@ -924,13 +914,13 @@
    * components [<code>level</code>, <code>N-1</code>].
    */
   private Path[] globPathsLevel(Path[] parents, String[] filePattern,
-      int level, PathFilter filter, boolean[] hasGlob) throws IOException {
+      int level, boolean[] hasGlob) throws IOException {
     if (level == filePattern.length - 1)
       return parents;
     if (parents == null || parents.length == 0) {
       return null;
     }
-    GlobFilter fp = new GlobFilter(filePattern[level], filter);
+    GlobFilter fp = new GlobFilter(filePattern[level]);
     if (fp.hasPattern()) {
       parents = FileUtil.stat2Paths(listStatus(parents, fp));
       hasGlob[0] = true;
@@ -939,7 +929,7 @@
         parents[i] = new Path(parents[i], filePattern[level]);
       }
     }
-    return globPathsLevel(parents, filePattern, level + 1, filter, hasGlob);
+    return globPathsLevel(parents, filePattern, level + 1, hasGlob);
   }
 
   /* A class that could decide if a string matches the glob or not */

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Thu Dec  4 06:11:23
2008
@@ -18,6 +18,7 @@
 package org.apache.hadoop.fs;
 
 import java.io.IOException;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -26,6 +27,19 @@
 
 public class TestGlobPaths extends TestCase {
   
+  static class RegexPathFilter implements PathFilter {
+    
+    private final String regex;
+    public RegexPathFilter(String regex) {
+      this.regex = regex;
+    }
+
+    public boolean accept(Path path) {
+      return path.toString().matches(regex);
+    }
+
+  }
+  
   static private MiniDFSCluster dfsCluster;
   static private FileSystem fs;
   static final private int NUM_OF_PATHS = 4;
@@ -48,6 +62,31 @@
     }
   }
   
+  public void testPathFilter() throws IOException {
+    try {
+      String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b" };
+      Path[] matchedPath = prepareTesting(USER_DIR + "/*/*", files,
+          new RegexPathFilter("^.*" + Pattern.quote(USER_DIR) + "/a/b"));
+      assertEquals(matchedPath.length, 1);
+      assertEquals(matchedPath[0], path[1]);
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  public void testPathFilterWithFixedLastComponent() throws IOException {
+    try {
+      String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b",
+                                      USER_DIR + "/c", USER_DIR + "/c/b", };
+      Path[] matchedPath = prepareTesting(USER_DIR + "/*/b", files,
+          new RegexPathFilter("^.*" + Pattern.quote(USER_DIR) + "/a/b"));
+      assertEquals(matchedPath.length, 1);
+      assertEquals(matchedPath[0], path[1]);
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
   public void testGlob() throws Exception {
     //pTestEscape(); // need to wait until HADOOP-1995 is fixed
     pTestJavaRegexSpecialChars();
@@ -368,6 +407,23 @@
     return globResults;
   }
   
+  private Path[] prepareTesting(String pattern, String[] files,
+      PathFilter filter) throws IOException {
+    for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) {
+      path[i] = new Path(files[i]).makeQualified(fs);
+      if (!fs.mkdirs(path[i])) {
+        throw new IOException("Mkdirs failed to create " + path[i].toString());
+      }
+    }
+    Path patternPath = new Path(pattern);
+    Path[] globResults = FileUtil.stat2Paths(fs.globStatus(patternPath, filter),
+                                             patternPath);
+    for(int i=0; i<globResults.length; i++) {
+      globResults[i] = globResults[i].makeQualified(fs);
+    }
+    return globResults;
+  }
+  
   private void cleanupDFS() throws IOException {
     fs.delete(new Path("/user"), true);
   }

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
(original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
Thu Dec  4 06:11:23 2008
@@ -90,7 +90,8 @@
   public static class TestPathFilter implements PathFilter {
 
     public boolean accept(Path path) {
-      return path.getName().length() == 1;
+      String name = path.getName();
+      return name.equals("TestFileInputFormatPathFilter") || name.length() == 1;
     }
   }
 



Mime
View raw message