hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From e..@apache.org
Subject svn commit: r951207 - in /hadoop/common/trunk: ./ src/java/org/apache/hadoop/fs/ src/test/core/org/apache/hadoop/fs/
Date Thu, 03 Jun 2010 23:39:33 GMT
Author: eli
Date: Thu Jun  3 23:39:33 2010
New Revision: 951207

URL: http://svn.apache.org/viewvc?rev=951207&view=rev
Log:
HADOOP-6787. Factor out glob pattern code from FileContext and FileSystem. Contributed by
Luke Lu.

Added:
    hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobFilter.java
    hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobPattern.java
    hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestGlobPattern.java
Modified:
    hadoop/common/trunk/CHANGES.txt
    hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java
    hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java

Modified: hadoop/common/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=951207&r1=951206&r2=951207&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Thu Jun  3 23:39:33 2010
@@ -57,6 +57,11 @@ Trunk (unreleased changes)
 
     HADOOP-6747. TestNetUtils fails on Mac OS X. (Todd Lipcon via jghoman)
 
+    HADOOP-6787. Factor out glob pattern code from FileContext and
+    Filesystem. Also fix bugs identified in HADOOP-6618 and make the
+    glob pattern code less restrictive and more POSIX standard
+    compliant. (Luke Lu via eli)
+
 Release 0.21.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java?rev=951207&r1=951206&r2=951207&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java Thu Jun  3 23:39:33
2010
@@ -1874,131 +1874,6 @@ public final class FileContext {
     }
   }
 
-  /* A class that could decide if a string matches the glob or not */
-  private static class GlobFilter implements PathFilter {
-    private PathFilter userFilter = DEFAULT_FILTER;
-    private Pattern regex;
-    private boolean hasPattern = false;
-      
-    /** Default pattern character: Escape any special meaning. */
-    private static final char  PAT_ESCAPE = '\\';
-    /** Default pattern character: Any single character. */
-    private static final char  PAT_ANY = '.';
-    /** Default pattern character: Character set close. */
-    private static final char  PAT_SET_CLOSE = ']';
-      
-    GlobFilter(final String filePattern) {
-      setRegex(filePattern);
-    }
-      
-    GlobFilter(final String filePattern, final PathFilter filter) {
-      userFilter = filter;
-      setRegex(filePattern);
-    }
-      
-    private boolean isJavaRegexSpecialChar(char pChar) {
-      return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' ||
-             pChar == '|' || pChar == '+';
-    }
-    
-    void setRegex(String filePattern) {
-      int len;
-      int setOpen;
-      int curlyOpen;
-      boolean setRange;
-
-      StringBuilder fileRegex = new StringBuilder();
-
-      // Validate the pattern
-      len = filePattern.length();
-      if (len == 0) {
-        return;
-      }
-
-      setOpen = 0;
-      setRange = false;
-      curlyOpen = 0;
-
-      for (int i = 0; i < len; i++) {
-        char pCh;
-          
-        // Examine a single pattern character
-        pCh = filePattern.charAt(i);
-        if (pCh == PAT_ESCAPE) {
-          fileRegex.append(pCh);
-          i++;
-          if (i >= len) {
-            error("An escaped character does not present", filePattern, i);
-          }
-          pCh = filePattern.charAt(i);
-        } else if (isJavaRegexSpecialChar(pCh)) {
-          fileRegex.append(PAT_ESCAPE);
-        } else if (pCh == '*') {
-          fileRegex.append(PAT_ANY);
-          hasPattern = true;
-        } else if (pCh == '?') {
-          pCh = PAT_ANY;
-          hasPattern = true;
-        } else if (pCh == '{') {
-          fileRegex.append('(');
-          pCh = '(';
-          curlyOpen++;
-          hasPattern = true;
-        } else if (pCh == ',' && curlyOpen > 0) {
-          fileRegex.append(")|");
-          pCh = '(';
-        } else if (pCh == '}' && curlyOpen > 0) {
-          // End of a group
-          curlyOpen--;
-          fileRegex.append(")");
-          pCh = ')';
-        } else if (pCh == '[' && setOpen == 0) {
-          setOpen++;
-          hasPattern = true;
-        } else if (pCh == '^' && setOpen > 0) {
-        } else if (pCh == '-' && setOpen > 0) {
-          // Character set range
-          setRange = true;
-        } else if (pCh == PAT_SET_CLOSE && setRange) {
-          // Incomplete character set range
-          error("Incomplete character set range", filePattern, i);
-        } else if (pCh == PAT_SET_CLOSE && setOpen > 0) {
-          // End of a character set
-          if (setOpen < 2) {
-            error("Unexpected end of set", filePattern, i);
-          }
-          setOpen = 0;
-        } else if (setOpen > 0) {
-          // Normal character, or the end of a character set range
-          setOpen++;
-          setRange = false;
-        }
-        fileRegex.append(pCh);
-      }
-        
-      // Check for a well-formed pattern
-      if (setOpen > 0 || setRange || curlyOpen > 0) {
-        // Incomplete character set or character range
-        error("Expecting set closure character or end of range, or }", 
-            filePattern, len);
-      }
-      regex = Pattern.compile(fileRegex.toString());
-    }
-      
-    boolean hasPattern() {
-      return hasPattern;
-    }
-      
-    public boolean accept(final Path path) {
-      return regex.matcher(path.getName()).matches() && userFilter.accept(path);
-    }
-      
-    private void error(final String s, final String pattern, final int pos) {
-      throw new HadoopIllegalArgumentException("Illegal file pattern: " + s
-          + " for glob " + pattern + " at " + pos);
-    }
-  }
-
   /**
    * Check if copying srcName to dst would overwrite an existing 
    * file or directory.

Modified: hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?rev=951207&r1=951206&r2=951207&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Thu Jun  3 23:39:33
2010
@@ -1369,127 +1369,6 @@ public abstract class FileSystem extends
     return globPathsLevel(parents, filePattern, level + 1, hasGlob);
   }
 
-  /* A class that could decide if a string matches the glob or not */
-  private static class GlobFilter implements PathFilter {
-    private PathFilter userFilter = DEFAULT_FILTER;
-    private Pattern regex;
-    private boolean hasPattern = false;
-      
-    /** Default pattern character: Escape any special meaning. */
-    private static final char  PAT_ESCAPE = '\\';
-    /** Default pattern character: Any single character. */
-    private static final char  PAT_ANY = '.';
-    /** Default pattern character: Character set close. */
-    private static final char  PAT_SET_CLOSE = ']';
-      
-    GlobFilter(String filePattern) throws IOException {
-      setRegex(filePattern);
-    }
-      
-    GlobFilter(String filePattern, PathFilter filter) throws IOException {
-      userFilter = filter;
-      setRegex(filePattern);
-    }
-      
-    private boolean isJavaRegexSpecialChar(char pChar) {
-      return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' ||
-             pChar == '|' || pChar == '+';
-    }
-    void setRegex(String filePattern) throws IOException {
-      int len;
-      int setOpen;
-      int curlyOpen;
-      boolean setRange;
-
-      StringBuilder fileRegex = new StringBuilder();
-
-      // Validate the pattern
-      len = filePattern.length();
-      if (len == 0)
-        return;
-
-      setOpen = 0;
-      setRange = false;
-      curlyOpen = 0;
-
-      for (int i = 0; i < len; i++) {
-        char pCh;
-          
-        // Examine a single pattern character
-        pCh = filePattern.charAt(i);
-        if (pCh == PAT_ESCAPE) {
-          fileRegex.append(pCh);
-          i++;
-          if (i >= len)
-            error("An escaped character does not present", filePattern, i);
-          pCh = filePattern.charAt(i);
-        } else if (isJavaRegexSpecialChar(pCh)) {
-          fileRegex.append(PAT_ESCAPE);
-        } else if (pCh == '*') {
-          fileRegex.append(PAT_ANY);
-          hasPattern = true;
-        } else if (pCh == '?') {
-          pCh = PAT_ANY;
-          hasPattern = true;
-        } else if (pCh == '{') {
-          fileRegex.append('(');
-          pCh = '(';
-          curlyOpen++;
-          hasPattern = true;
-        } else if (pCh == ',' && curlyOpen > 0) {
-          fileRegex.append(")|");
-          pCh = '(';
-        } else if (pCh == '}' && curlyOpen > 0) {
-          // End of a group
-          curlyOpen--;
-          fileRegex.append(")");
-          pCh = ')';
-        } else if (pCh == '[' && setOpen == 0) {
-          setOpen++;
-          hasPattern = true;
-        } else if (pCh == '^' && setOpen > 0) {
-        } else if (pCh == '-' && setOpen > 0) {
-          // Character set range
-          setRange = true;
-        } else if (pCh == PAT_SET_CLOSE && setRange) {
-          // Incomplete character set range
-          error("Incomplete character set range", filePattern, i);
-        } else if (pCh == PAT_SET_CLOSE && setOpen > 0) {
-          // End of a character set
-          if (setOpen < 2)
-            error("Unexpected end of set", filePattern, i);
-          setOpen = 0;
-        } else if (setOpen > 0) {
-          // Normal character, or the end of a character set range
-          setOpen++;
-          setRange = false;
-        }
-        fileRegex.append(pCh);
-      }
-        
-      // Check for a well-formed pattern
-      if (setOpen > 0 || setRange || curlyOpen > 0) {
-        // Incomplete character set or character range
-        error("Expecting set closure character or end of range, or }", 
-            filePattern, len);
-      }
-      regex = Pattern.compile(fileRegex.toString());
-    }
-      
-    boolean hasPattern() {
-      return hasPattern;
-    }
-      
-    public boolean accept(Path path) {
-      return regex.matcher(path.getName()).matches() && userFilter.accept(path);
-    }
-      
-    private void error(String s, String pattern, int pos) throws IOException {
-      throw new IOException("Illegal file pattern: "
-                            +s+ " for glob "+ pattern + " at " + pos);
-    }
-  }
-    
   /** Return the current user's home directory in this filesystem.
    * The default implementation returns "/user/$USER/".
    */

Added: hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobFilter.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobFilter.java?rev=951207&view=auto
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobFilter.java (added)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobFilter.java Thu Jun  3 23:39:33
2010
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.util.regex.PatternSyntaxException;
+import java.io.IOException;
+
+ // A class that could decide if a string matches the glob or not
+class GlobFilter implements PathFilter {
+  private final static PathFilter DEFAULT_FILTER = new PathFilter() {
+      public boolean accept(Path file) {
+        return true;
+      }
+    };
+
+  private PathFilter userFilter = DEFAULT_FILTER;
+  private GlobPattern pattern;
+
+  GlobFilter(String filePattern) throws IOException {
+    init(filePattern, DEFAULT_FILTER);
+  }
+
+  GlobFilter(String filePattern, PathFilter filter) throws IOException {
+    init(filePattern, filter);
+  }
+
+  void init(String filePattern, PathFilter filter) throws IOException {
+    try {
+      userFilter = filter;
+      pattern = new GlobPattern(filePattern);
+    }
+    catch (PatternSyntaxException e) {
+      // Existing code expects IOException startWith("Illegal file pattern")
+      throw new IOException("Illegal file pattern: "+ e.getMessage(), e);
+    }
+  }
+
+  boolean hasPattern() {
+    return pattern.hasWildcard();
+  }
+
+  public boolean accept(Path path) {
+    return pattern.matches(path.getName()) && userFilter.accept(path);
+  }
+}

Added: hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobPattern.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobPattern.java?rev=951207&view=auto
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobPattern.java (added)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/GlobPattern.java Thu Jun  3 23:39:33
2010
@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * A class for POSIX glob pattern with brace expansions.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class GlobPattern {
+  private static final char BACKSLASH = '\\';
+  private Pattern compiled;
+  private boolean hasWildcard = false;
+
+  /**
+   * Construct the glob pattern object with a glob pattern string
+   * @param globPattern the glob pattern string
+   */
+  public GlobPattern(String globPattern) {
+    set(globPattern);
+  }
+
+  /**
+   * @return the compiled pattern
+   */
+  public Pattern compiled() {
+    return compiled;
+  }
+
+  /**
+   * Compile glob pattern string
+   * @param globPattern the glob pattern
+   * @return the pattern object
+   */
+  public static Pattern compile(String globPattern) {
+    return new GlobPattern(globPattern).compiled();
+  }
+
+  /**
+   * Match input against the compiled glob pattern
+   * @param s input chars
+   * @return true for successful matches
+   */
+  public boolean matches(CharSequence s) {
+    return compiled.matcher(s).matches();
+  }
+
+  /**
+   * Set and compile a glob pattern
+   * @param glob  the glob pattern string
+   */
+  public void set(String glob) {
+    StringBuilder regex = new StringBuilder();
+    int setOpen = 0;
+    int curlyOpen = 0;
+    int len = glob.length();
+    hasWildcard = false;
+
+    for (int i = 0; i < len; i++) {
+      char c = glob.charAt(i);
+
+      switch (c) {
+        case BACKSLASH:
+          if (++i >= len) {
+            error("Missing escaped character", glob, i);
+          }
+          regex.append(c).append(glob.charAt(i));
+          continue;
+        case '.':
+        case '$':
+        case '(':
+        case ')':
+        case '|':
+        case '+':
+          // escape regex special chars that are not glob special chars
+          regex.append(BACKSLASH);
+          break;
+        case '*':
+          regex.append('.');
+          hasWildcard = true;
+          break;
+        case '?':
+          regex.append('.');
+          hasWildcard = true;
+          continue;
+        case '{': // start of a group
+          regex.append("(?:"); // non-capturing
+          curlyOpen++;
+          hasWildcard = true;
+          continue;
+        case ',':
+          regex.append(curlyOpen > 0 ? '|' : c);
+          continue;
+        case '}':
+          if (curlyOpen > 0) {
+            // end of a group
+            curlyOpen--;
+            regex.append(")");
+            continue;
+          }
+          break;
+        case '[':
+          if (setOpen > 0) {
+            error("Unclosed character class", glob, i);
+          }
+          setOpen++;
+          hasWildcard = true;
+          break;
+        case '^': // ^ inside [...] can be unescaped
+          if (setOpen == 0) {
+            regex.append(BACKSLASH);
+          }
+          break;
+        case '!': // [! needs to be translated to [^
+          regex.append(setOpen > 0 && '[' == glob.charAt(i - 1) ? '^' : '!');
+          continue;
+        case ']':
+          // Many set errors like [][] could not be easily detected here,
+          // as []], []-] and [-] are all valid POSIX glob and java regex.
+          // We'll just let the regex compiler do the real work.
+          setOpen = 0;
+          break;
+        default:
+      }
+      regex.append(c);
+    }
+
+    if (setOpen > 0) {
+      error("Unclosed character class", glob, len);
+    }
+    if (curlyOpen > 0) {
+      error("Unclosed group", glob, len);
+    }
+    compiled = Pattern.compile(regex.toString());
+  }
+
+  /**
+   * @return true if this is a wildcard pattern (with special chars)
+   */
+  public boolean hasWildcard() {
+    return hasWildcard;
+  }
+
+  private static void error(String message, String pattern, int pos) {
+    throw new PatternSyntaxException(message, pattern, pos);
+  }
+}

Added: hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestGlobPattern.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestGlobPattern.java?rev=951207&view=auto
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestGlobPattern.java (added)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestGlobPattern.java Thu Jun  3
23:39:33 2010
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.util.regex.PatternSyntaxException;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * Tests for glob patterns
+ */
+public class TestGlobPattern {
+  private void assertMatch(boolean yes, String glob, String...input) {
+    GlobPattern pattern = new GlobPattern(glob);
+
+    for (String s : input) {
+      boolean result = pattern.matches(s);
+      assertTrue(glob +" should"+ (yes ? "" : " not") +" match "+ s,
+                 yes ? result : !result);
+    }
+  }
+
+  private void shouldThrow(String... globs) {
+    for (String glob : globs) {
+      try {
+        GlobPattern.compile(glob);
+      }
+      catch (PatternSyntaxException e) {
+        e.printStackTrace();
+        continue;
+      }
+      assertTrue("glob "+ glob +" should throw", false);
+    }
+  }
+
+  @Test public void testValidPatterns() {
+    assertMatch(true, "*", "^$", "foo", "bar");
+    assertMatch(true, "?", "?", "^", "[", "]", "$");
+    assertMatch(true, "foo*", "foo", "food", "fool");
+    assertMatch(true, "f*d", "fud", "food");
+    assertMatch(true, "*d", "good", "bad");
+    assertMatch(true, "\\*\\?\\[\\{\\\\", "*?[{\\");
+    assertMatch(true, "[]^-]", "]", "-", "^");
+    assertMatch(true, "]", "]");
+    assertMatch(true, "^.$()|+", "^.$()|+");
+    assertMatch(true, "[^^]", ".", "$", "[", "]");
+    assertMatch(false, "[^^]", "^");
+    assertMatch(true, "[!!-]", "^", "?");
+    assertMatch(false, "[!!-]", "!", "-");
+    assertMatch(true, "{[12]*,[45]*,[78]*}", "1", "2!", "4", "42", "7", "7$");
+    assertMatch(false, "{[12]*,[45]*,[78]*}", "3", "6", "9ß");
+    assertMatch(true, "}", "}");
+  }
+
+  @Test public void testInvalidPatterns() {
+    shouldThrow("[", "[[]]", "[][]", "{", "\\");
+  }
+}



Mime
View raw message