Return-Path: Delivered-To: apmail-hadoop-core-commits-archive@www.apache.org Received: (qmail 93772 invoked from network); 28 Mar 2008 12:45:39 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 28 Mar 2008 12:45:39 -0000 Received: (qmail 17399 invoked by uid 500); 28 Mar 2008 12:45:38 -0000 Delivered-To: apmail-hadoop-core-commits-archive@hadoop.apache.org Received: (qmail 17367 invoked by uid 500); 28 Mar 2008 12:45:38 -0000 Mailing-List: contact core-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: core-dev@hadoop.apache.org Delivered-To: mailing list core-commits@hadoop.apache.org Received: (qmail 17358 invoked by uid 99); 28 Mar 2008 12:45:38 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 28 Mar 2008 05:45:38 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 28 Mar 2008 12:45:07 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id BFF7A1A9832; Fri, 28 Mar 2008 05:45:18 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r642211 - in /hadoop/core/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/FileInputFormat.java Date: Fri, 28 Mar 2008 12:45:17 -0000 To: core-commits@hadoop.apache.org From: ddas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080328124518.BFF7A1A9832@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: ddas Date: Fri Mar 28 05:45:15 2008 New Revision: 642211 URL: http://svn.apache.org/viewvc?rev=642211&view=rev Log: HADOOP-2055. Allows users to set PathFilter on the FileInputFormat. Contributed by Alejandro Abdelnur. Modified: hadoop/core/trunk/CHANGES.txt hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java Modified: hadoop/core/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=642211&r1=642210&r2=642211&view=diff ============================================================================== --- hadoop/core/trunk/CHANGES.txt (original) +++ hadoop/core/trunk/CHANGES.txt Fri Mar 28 05:45:15 2008 @@ -84,6 +84,9 @@ HADOOP-1622. Allow multiple jar files for map reduce. (Mahadev Konar via dhruba) + HADOOP-2055. Allows users to set PathFilter on the FileInputFormat. + (Alejandro Abdelnur via ddas) + IMPROVEMENTS HADOOP-2655. Copy on write for data and metadata files in the Modified: hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java?rev=642211&r1=642210&r2=642211&view=diff ============================================================================== --- hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java (original) +++ hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java Fri Mar 28 05:45:15 2008 @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.util.ReflectionUtils; /** * A base class for file-based {@link InputFormat}. @@ -59,6 +60,28 @@ } /** + * Proxy PathFilter that accepts a path only if all filters given in the + * constructor do. Used by the listPaths() to apply the built-in + * hiddenFileFilter together with a user provided one (if any). + */ + private static class MultiPathFilter implements PathFilter { + private List filters; + + public MultiPathFilter(List filters) { + this.filters = filters; + } + + public boolean accept(Path path) { + for (PathFilter filter : filters) { + if (!filter.accept(path)) { + return false; + } + } + return true; + } + } + + /** * Is the given filename splitable? Usually, true, but if the file is * stream compressed, it will not be. * @@ -79,6 +102,28 @@ Reporter reporter) throws IOException; + /** + * Set a PathFilter to be applied to the input paths for the map-reduce job. + * + * @param filter the PathFilter class use for filtering the input paths. + */ + public static void setInputPathFilter(JobConf conf, + Class filter) { + conf.setClass("mapred.input.pathFilter.class", filter, PathFilter.class); + } + + /** + * Get a PathFilter instance of the filter set for the input paths. + * + * @return the PathFilter instance set for the job, NULL if none has been set. + */ + public static PathFilter getInputPathFilter(JobConf conf) { + Class filterClass = conf.getClass("mapred.input.pathFilter.class", null, + PathFilter.class); + return (filterClass != null) ? + (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null; + } + /** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. @@ -93,11 +138,23 @@ if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } - List result = new ArrayList(); + + List result = new ArrayList(); + + // creates a MultiPathFilter with the hiddenFileFilter and the + // user provided one (if any). + List filters = new ArrayList(); + filters.add(hiddenFileFilter); + PathFilter jobFilter = getInputPathFilter(job); + if (jobFilter != null) { + filters.add(jobFilter); + } + PathFilter inputFilter = new MultiPathFilter(filters); + for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); Path[] matches = - fs.listPaths(fs.globPaths(p, hiddenFileFilter), hiddenFileFilter); + fs.listPaths(fs.globPaths(p, inputFilter), inputFilter); for (Path match: matches) { result.add(fs.makeQualified(match)); }