Return-Path: X-Original-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7D1E11157B for ; Mon, 21 Jul 2014 21:24:40 +0000 (UTC) Received: (qmail 77390 invoked by uid 500); 21 Jul 2014 21:24:40 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 77313 invoked by uid 500); 21 Jul 2014 21:24:40 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 77302 invoked by uid 99); 21 Jul 2014 21:24:40 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 21 Jul 2014 21:24:40 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 21 Jul 2014 21:24:41 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id B6DE42388868; Mon, 21 Jul 2014 21:24:15 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1612400 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/ hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/jav... Date: Mon, 21 Jul 2014 21:24:15 -0000 To: mapreduce-commits@hadoop.apache.org From: jlowe@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140721212415.B6DE42388868@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jlowe Date: Mon Jul 21 21:24:15 2014 New Revision: 1612400 URL: http://svn.apache.org/r1612400 Log: MAPREDUCE-5756. CombineFileInputFormat.getSplits() including directories in its results. Contributed by Jason Dere Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1612400&r1=1612399&r2=1612400&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Mon Jul 21 21:24:15 2014 @@ -172,6 +172,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-5957. AM throws ClassNotFoundException with job classloader enabled if custom output format/committer is used (Sangjin Lee via jlowe) + MAPREDUCE-5756. CombineFileInputFormat.getSplits() including directories + in its results (Jason Dere via jlowe) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java?rev=1612400&r1=1612399&r2=1612400&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java Mon Jul 21 21:24:15 2014 @@ -579,7 +579,7 @@ public abstract class CombineFileInputFo blocks = new OneBlockInfo[0]; } else { - if(locations.length == 0) { + if(locations.length == 0 && !stat.isDirectory()) { locations = new BlockLocation[] { new BlockLocation() }; } Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java?rev=1612400&r1=1612399&r2=1612400&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Mon Jul 21 21:24:15 2014 @@ -1275,6 +1275,61 @@ public class TestCombineFileInputFormat } /** + * Test that directories do not get included as part of getSplits() + */ + @Test + public void testGetSplitsWithDirectory() throws Exception { + MiniDFSCluster dfs = null; + try { + Configuration conf = new Configuration(); + dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1) + .build(); + dfs.waitActive(); + + dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1) + .build(); + dfs.waitActive(); + + FileSystem fileSys = dfs.getFileSystem(); + + // Set up the following directory structure: + // /dir1/: directory + // /dir1/file: regular file + // /dir1/dir2/: directory + Path dir1 = new Path("/dir1"); + Path file = new Path("/dir1/file1"); + Path dir2 = new Path("/dir1/dir2"); + if (!fileSys.mkdirs(dir1)) { + throw new IOException("Mkdirs failed to create " + dir1.toString()); + } + FSDataOutputStream out = fileSys.create(file); + out.write(new byte[0]); + out.close(); + if (!fileSys.mkdirs(dir2)) { + throw new IOException("Mkdirs failed to create " + dir2.toString()); + } + + // split it using a CombinedFile input format + DummyInputFormat inFormat = new DummyInputFormat(); + Job job = Job.getInstance(conf); + FileInputFormat.setInputPaths(job, "/dir1"); + List splits = inFormat.getSplits(job); + + // directories should be omitted from getSplits() - we should only see file1 and not dir2 + assertEquals(1, splits.size()); + CombineFileSplit fileSplit = (CombineFileSplit) splits.get(0); + assertEquals(1, fileSplit.getNumPaths()); + assertEquals(file.getName(), fileSplit.getPath(0).getName()); + assertEquals(0, fileSplit.getOffset(0)); + assertEquals(0, fileSplit.getLength(0)); + } finally { + if (dfs != null) { + dfs.shutdown(); + } + } + } + + /** * Test when input files are from non-default file systems */ @Test