hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From szets...@apache.org
Subject svn commit: r815483 - in /hadoop/mapreduce/trunk: CHANGES.txt src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java src/tools/org/apache/hadoop/tools/DistCp.java
Date Tue, 15 Sep 2009 20:57:11 GMT
Author: szetszwo
Date: Tue Sep 15 20:57:10 2009
New Revision: 815483

URL: http://svn.apache.org/viewvc?rev=815483&view=rev
Log:
MAPREDUCE-648. Fix two distcp bugs: (1) it should not launch a job if all src paths are directories,
and (2) it does not skip copying when updating a single file.  Contributed by Ravi Gummadi

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=815483&r1=815482&r2=815483&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Sep 15 20:57:10 2009
@@ -596,3 +596,7 @@
 
     MAPREDUCE-112. Add counters for reduce input, output records to the new API.
     (Jothi Padmanabhan via cdouglas)
+
+    MAPREDUCE-648. Fix two distcp bugs: (1) it should not launch a job if all
+    src paths are directories, and (2) it does not skip copying when updating
+    a single file.  (Ravi Gummadi via szetszwo)

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java?rev=815483&r1=815482&r2=815483&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java (original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java Tue
Sep 15 20:57:10 2009
@@ -44,6 +44,7 @@
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MiniMRCluster;
 import org.apache.hadoop.security.UnixUserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -495,6 +496,16 @@
                         "file:///"+TEST_ROOT_DIR+"/dest2/"+fname});
       assertTrue("Source and destination directories do not match.",
           checkFiles(fs, TEST_ROOT_DIR+"/dest2", files));     
+      
+      // single file update should skip copy if destination has the file already
+      String[] args = {"-update", "file:///"+TEST_ROOT_DIR+"/srcdat/"+fname,
+          "file:///"+TEST_ROOT_DIR+"/dest2/"+fname};
+      Configuration conf = new Configuration();
+      JobConf job = new JobConf(conf, DistCp.class);
+      DistCp.Arguments distcpArgs = DistCp.Arguments.valueOf(args, conf);
+      assertFalse("Single file update failed to skip copying even though the " 
+          + "file exists at destination.", DistCp.setup(conf, job, distcpArgs));
+      
       //copy single file to existing dir
       deldir(fs, TEST_ROOT_DIR+"/dest2");
       fs.mkdirs(new Path(TEST_ROOT_DIR+"/dest2"));

Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=815483&r1=815482&r2=815483&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Tue Sep 15 20:57:10
2009
@@ -40,6 +40,7 @@
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.fs.FileChecksum;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -380,6 +381,16 @@
       int totfiles = job.getInt(SRC_COUNT_LABEL, -1);
       assert totfiles >= 0 : "Invalid file count " + totfiles;
 
+      if (totfiles == 1) {
+        // Copying a single file; use dst path provided by user as
+        // destination file rather than destination directory
+        Path dstparent = absdst.getParent();
+        if (!(destFileSys.exists(dstparent) &&
+              destFileSys.getFileStatus(dstparent).isDir())) {
+          absdst = dstparent;
+        }
+      }
+      
       // if a directory, ensure created even if empty
       if (srcstat.isDir()) {
         if (destFileSys.exists(absdst)) {
@@ -438,15 +449,6 @@
             + " from " + srcstat.getPath());        
       }
       else {
-        if (totfiles == 1) {
-          // Copying a single file; use dst path provided by user as destination
-          // rather than destination directory, if a file
-          Path dstparent = absdst.getParent();
-          if (!(destFileSys.exists(dstparent) &&
-                destFileSys.getFileStatus(dstparent).isDir())) {
-            absdst = dstparent;
-          }
-        }
         if (destFileSys.exists(absdst) &&
             destFileSys.getFileStatus(absdst).isDir()) {
           throw new IOException(absdst + " is a directory");
@@ -736,7 +738,7 @@
     }
   }
 
-  static private class Arguments {
+  static class Arguments {
     final List<Path> srcs;
     final Path basedir;
     final Path dst;
@@ -1023,13 +1025,32 @@
   }
   
   /**
+   * Does the dir already exist at destination ?
+   * @return true   if the dir already exists at destination
+   */
+  private static boolean dirExists(Configuration conf, Path dst)
+                 throws IOException {
+    FileSystem destFileSys = dst.getFileSystem(conf);
+    FileStatus status = null;
+    try {
+      status = destFileSys.getFileStatus(dst);
+    }catch (FileNotFoundException e) {
+      return false;
+    }
+    if (!status.isDir()) {
+      throw new FileAlreadyExistsException("Not a dir: " + dst+" is a file.");
+    }
+    return true;
+  }
+  
+  /**
    * Initialize DFSCopyFileMapper specific job-configuration.
    * @param conf : The dfs/mapred configuration.
    * @param jobConf : The handle to the jobConf object to be initialized.
    * @param args Arguments
    * @return true if it is necessary to launch a job.
    */
-  private static boolean setup(Configuration conf, JobConf jobConf,
+  static boolean setup(Configuration conf, JobConf jobConf,
                             final Arguments args)
       throws IOException {
     jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());
@@ -1150,9 +1171,12 @@
         
         if (srcfilestat.isDir()) {
           ++srcCount;
-          ++dirCount;
           final String dst = makeRelative(root,src);
-          src_writer.append(new LongWritable(0), new FilePair(srcfilestat, dst));
+          if (!update || !dirExists(conf, new Path(args.dst, dst))) {
+            ++dirCount;
+            src_writer.append(new LongWritable(0),
+                              new FilePair(srcfilestat, dst));
+          }
           dst_writer.append(new Text(dst), new Text(src.toString()));
         }
 
@@ -1161,23 +1185,39 @@
           FileStatus cur = pathstack.pop();
           FileStatus[] children = srcfs.listStatus(cur.getPath());
           for(int i = 0; i < children.length; i++) {
-            boolean skipfile = false;
+            boolean skipPath = false;
             final FileStatus child = children[i]; 
             final String dst = makeRelative(root, child.getPath());
             ++srcCount;
 
             if (child.isDir()) {
               pathstack.push(child);
-              ++dirCount;
+              if (!update || !dirExists(conf, new Path(args.dst, dst))) {
+                ++dirCount;
+              }
+              else {
+                skipPath = true; // skip creating dir at destination
+              }
             }
             else {
-              //skip file if the src and the dst files are the same.
-              skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst,
dst));
-              //skip file if it exceed file limit or size limit
-              skipfile |= fileCount == args.filelimit
+              Path destPath = new Path(args.dst, dst);
+              if (!cur.isDir() && (args.srcs.size() == 1)) {
+                // Copying a single file; use dst path provided by user as
+                // destination file rather than destination directory
+                Path dstparent = destPath.getParent();
+                FileSystem destFileSys = destPath.getFileSystem(jobConf);
+                if (!(destFileSys.exists(dstparent) &&
+                    destFileSys.getFileStatus(dstparent).isDir())) {
+                  destPath = dstparent;
+                }
+              }
+              //skip path if the src and the dst files are the same.
+              skipPath = update && sameFile(srcfs, child, dstfs, destPath);
+              //skip path if it exceed file limit or size limit
+              skipPath |= fileCount == args.filelimit
                           || byteCount + child.getLen() > args.sizelimit; 
 
-              if (!skipfile) {
+              if (!skipPath) {
                 ++fileCount;
                 byteCount += child.getLen();
 
@@ -1196,7 +1236,7 @@
               }
             }
 
-            if (!skipfile) {
+            if (!skipPath) {
               src_writer.append(new LongWritable(child.isDir()? 0: child.getLen()),
                   new FilePair(child, dst));
             }



Mime
View raw message