From mapreduce-commits-return-271-apmail-hadoop-mapreduce-commits-archive=hadoop.apache.org@hadoop.apache.org Tue Sep 15 20:57:36 2009 Return-Path: Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: (qmail 59323 invoked from network); 15 Sep 2009 20:57:36 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 15 Sep 2009 20:57:36 -0000 Received: (qmail 58934 invoked by uid 500); 15 Sep 2009 20:57:36 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 58907 invoked by uid 500); 15 Sep 2009 20:57:36 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 58896 invoked by uid 99); 15 Sep 2009 20:57:36 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 15 Sep 2009 20:57:36 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 15 Sep 2009 20:57:32 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 6D528238897D; Tue, 15 Sep 2009 20:57:11 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r815483 - in /hadoop/mapreduce/trunk: CHANGES.txt src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java src/tools/org/apache/hadoop/tools/DistCp.java Date: Tue, 15 Sep 2009 20:57:11 -0000 To: mapreduce-commits@hadoop.apache.org From: szetszwo@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090915205711.6D528238897D@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: szetszwo Date: Tue Sep 15 20:57:10 2009 New Revision: 815483 URL: http://svn.apache.org/viewvc?rev=815483&view=rev Log: MAPREDUCE-648. Fix two distcp bugs: (1) it should not launch a job if all src paths are directories, and (2) it does not skip copying when updating a single file. Contributed by Ravi Gummadi Modified: hadoop/mapreduce/trunk/CHANGES.txt hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Modified: hadoop/mapreduce/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=815483&r1=815482&r2=815483&view=diff ============================================================================== --- hadoop/mapreduce/trunk/CHANGES.txt (original) +++ hadoop/mapreduce/trunk/CHANGES.txt Tue Sep 15 20:57:10 2009 @@ -596,3 +596,7 @@ MAPREDUCE-112. Add counters for reduce input, output records to the new API. (Jothi Padmanabhan via cdouglas) + + MAPREDUCE-648. Fix two distcp bugs: (1) it should not launch a job if all + src paths are directories, and (2) it does not skip copying when updating + a single file. (Ravi Gummadi via szetszwo) Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java?rev=815483&r1=815482&r2=815483&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java (original) +++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java Tue Sep 15 20:57:10 2009 @@ -44,6 +44,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.security.UnixUserGroupInformation; import org.apache.hadoop.security.UserGroupInformation; @@ -495,6 +496,16 @@ "file:///"+TEST_ROOT_DIR+"/dest2/"+fname}); assertTrue("Source and destination directories do not match.", checkFiles(fs, TEST_ROOT_DIR+"/dest2", files)); + + // single file update should skip copy if destination has the file already + String[] args = {"-update", "file:///"+TEST_ROOT_DIR+"/srcdat/"+fname, + "file:///"+TEST_ROOT_DIR+"/dest2/"+fname}; + Configuration conf = new Configuration(); + JobConf job = new JobConf(conf, DistCp.class); + DistCp.Arguments distcpArgs = DistCp.Arguments.valueOf(args, conf); + assertFalse("Single file update failed to skip copying even though the " + + "file exists at destination.", DistCp.setup(conf, job, distcpArgs)); + //copy single file to existing dir deldir(fs, TEST_ROOT_DIR+"/dest2"); fs.mkdirs(new Path(TEST_ROOT_DIR+"/dest2")); Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=815483&r1=815482&r2=815483&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original) +++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Tue Sep 15 20:57:10 2009 @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -380,6 +381,16 @@ int totfiles = job.getInt(SRC_COUNT_LABEL, -1); assert totfiles >= 0 : "Invalid file count " + totfiles; + if (totfiles == 1) { + // Copying a single file; use dst path provided by user as + // destination file rather than destination directory + Path dstparent = absdst.getParent(); + if (!(destFileSys.exists(dstparent) && + destFileSys.getFileStatus(dstparent).isDir())) { + absdst = dstparent; + } + } + // if a directory, ensure created even if empty if (srcstat.isDir()) { if (destFileSys.exists(absdst)) { @@ -438,15 +449,6 @@ + " from " + srcstat.getPath()); } else { - if (totfiles == 1) { - // Copying a single file; use dst path provided by user as destination - // rather than destination directory, if a file - Path dstparent = absdst.getParent(); - if (!(destFileSys.exists(dstparent) && - destFileSys.getFileStatus(dstparent).isDir())) { - absdst = dstparent; - } - } if (destFileSys.exists(absdst) && destFileSys.getFileStatus(absdst).isDir()) { throw new IOException(absdst + " is a directory"); @@ -736,7 +738,7 @@ } } - static private class Arguments { + static class Arguments { final List srcs; final Path basedir; final Path dst; @@ -1023,13 +1025,32 @@ } /** + * Does the dir already exist at destination ? + * @return true if the dir already exists at destination + */ + private static boolean dirExists(Configuration conf, Path dst) + throws IOException { + FileSystem destFileSys = dst.getFileSystem(conf); + FileStatus status = null; + try { + status = destFileSys.getFileStatus(dst); + }catch (FileNotFoundException e) { + return false; + } + if (!status.isDir()) { + throw new FileAlreadyExistsException("Not a dir: " + dst+" is a file."); + } + return true; + } + + /** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments * @return true if it is necessary to launch a job. */ - private static boolean setup(Configuration conf, JobConf jobConf, + static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); @@ -1150,9 +1171,12 @@ if (srcfilestat.isDir()) { ++srcCount; - ++dirCount; final String dst = makeRelative(root,src); - src_writer.append(new LongWritable(0), new FilePair(srcfilestat, dst)); + if (!update || !dirExists(conf, new Path(args.dst, dst))) { + ++dirCount; + src_writer.append(new LongWritable(0), + new FilePair(srcfilestat, dst)); + } dst_writer.append(new Text(dst), new Text(src.toString())); } @@ -1161,23 +1185,39 @@ FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for(int i = 0; i < children.length; i++) { - boolean skipfile = false; + boolean skipPath = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); - ++dirCount; + if (!update || !dirExists(conf, new Path(args.dst, dst))) { + ++dirCount; + } + else { + skipPath = true; // skip creating dir at destination + } } else { - //skip file if the src and the dst files are the same. - skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); - //skip file if it exceed file limit or size limit - skipfile |= fileCount == args.filelimit + Path destPath = new Path(args.dst, dst); + if (!cur.isDir() && (args.srcs.size() == 1)) { + // Copying a single file; use dst path provided by user as + // destination file rather than destination directory + Path dstparent = destPath.getParent(); + FileSystem destFileSys = destPath.getFileSystem(jobConf); + if (!(destFileSys.exists(dstparent) && + destFileSys.getFileStatus(dstparent).isDir())) { + destPath = dstparent; + } + } + //skip path if the src and the dst files are the same. + skipPath = update && sameFile(srcfs, child, dstfs, destPath); + //skip path if it exceed file limit or size limit + skipPath |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; - if (!skipfile) { + if (!skipPath) { ++fileCount; byteCount += child.getLen(); @@ -1196,7 +1236,7 @@ } } - if (!skipfile) { + if (!skipPath) { src_writer.append(new LongWritable(child.isDir()? 0: child.getLen()), new FilePair(child, dst)); }