hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From szets...@apache.org
Subject svn commit: r885530 - in /hadoop/mapreduce/trunk: CHANGES.txt src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java src/tools/org/apache/hadoop/tools/DistCp.java
Date Mon, 30 Nov 2009 18:38:07 GMT
Author: szetszwo
Date: Mon Nov 30 18:38:06 2009
New Revision: 885530

URL: http://svn.apache.org/viewvc?rev=885530&view=rev
Log:
MAPREDUCE-1231. Added a new DistCp option, -skipcrccheck, so that the CRC check during setup
can be skipped.  Contributed by Jothi Padmanabhan

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=885530&r1=885529&r2=885530&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Mon Nov 30 18:38:06 2009
@@ -51,6 +51,9 @@
     MAPREDUCE-1169. Improvements to mysqldump use in Sqoop.
     (Aaron Kimball via tomwhite)
 
+    MAPREDUCE-1231. Added a new DistCp option, -skipcrccheck, so that the CRC
+    check during setup can be skipped.  (Jothi Padmanabhan via szetszwo)
+
   OPTIMIZATIONS
 
     MAPREDUCE-270. Fix the tasktracker to optionally send an out-of-band

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java?rev=885530&r1=885529&r2=885530&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java (original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java Mon
Nov 30 18:38:06 2009
@@ -452,6 +452,81 @@
     }
   }
 
+  public void testCopyDfsToDfsUpdateWithSkipCRC() throws Exception {
+    MiniDFSCluster cluster = null;
+    try {
+      Configuration conf = new Configuration();
+      cluster = new MiniDFSCluster(conf, 2, true, null);
+      final FileSystem hdfs = cluster.getFileSystem();
+      final String namenode = hdfs.getUri().toString();
+      
+      FileSystem fs = FileSystem.get(URI.create(namenode), new Configuration());
+      // Create two files of the same name, same length but different
+      // contents
+      final String testfilename = "test";
+      final String srcData = "act act act";
+      final String destData = "cat cat cat";
+      
+      if (namenode.startsWith("hdfs://")) {
+        deldir(hdfs,"/logs");
+        
+        Path srcPath = new Path("/srcdat", testfilename);
+        Path destPath = new Path("/destdat", testfilename);
+        FSDataOutputStream out = fs.create(srcPath, true);
+        out.writeUTF(srcData);
+        out.close();
+
+        out = fs.create(destPath, true);
+        out.writeUTF(destData);
+        out.close();
+        
+        // Run with -skipcrccheck option
+        ToolRunner.run(new DistCp(conf), new String[] {
+          "-p",
+          "-update",
+          "-skipcrccheck",
+          "-log",
+          namenode+"/logs",
+          namenode+"/srcdat",
+          namenode+"/destdat"});
+        
+        // File should not be overwritten
+        FSDataInputStream in = hdfs.open(destPath);
+        String s = in.readUTF();
+        System.out.println("Dest had: " + s);
+        assertTrue("Dest got over written even with skip crc",
+            s.equalsIgnoreCase(destData));
+        in.close();
+        
+        deldir(hdfs, "/logs");
+
+        // Run without the option        
+        ToolRunner.run(new DistCp(conf), new String[] {
+          "-p",
+          "-update",
+          "-log",
+          namenode+"/logs",
+          namenode+"/srcdat",
+          namenode+"/destdat"});
+        
+        // File should be overwritten
+        in = hdfs.open(destPath);
+        s = in.readUTF();
+        System.out.println("Dest had: " + s);
+
+        assertTrue("Dest did not get overwritten without skip crc",
+            s.equalsIgnoreCase(srcData));
+        in.close();
+
+        deldir(hdfs, "/destdat");
+        deldir(hdfs, "/srcdat");
+        deldir(hdfs, "/logs");
+       }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+    
   public void testCopyDuplication() throws Exception {
     final FileSystem localfs = FileSystem.get(LOCAL_FS, new Configuration());
     try {    

Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=885530&r1=885529&r2=885530&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Mon Nov 30 18:38:06
2009
@@ -97,6 +97,9 @@
     "\n-m <num_maps>          Maximum number of simultaneous copies" +
     "\n-overwrite             Overwrite destination" +
     "\n-update                Overwrite if src size different from dst size" +
+    "\n-skipcrccheck          Do not use CRC check to determine if src is " +
+    "\n                       different from dest. Relevant only if -update" +
+    "\n                       is specified" +
     "\n-f <urilist_uri>       Use list at <urilist_uri> as src list" +
     "\n-filelimit <n>         Limit the total number of files to be <= n" +
     "\n-sizelimit <n>         Limit the total size to be <= n bytes" +
@@ -134,7 +137,8 @@
     IGNORE_READ_FAILURES("-i", NAME + ".ignore.read.failures"),
     PRESERVE_STATUS("-p", NAME + ".preserve.status"),
     OVERWRITE("-overwrite", NAME + ".overwrite.always"),
-    UPDATE("-update", NAME + ".overwrite.ifnewer");
+    UPDATE("-update", NAME + ".overwrite.ifnewer"),
+    SKIPCRC("-skipcrccheck", NAME + ".skip.crc.check");
 
     final String cmd, propertyname;
 
@@ -327,7 +331,8 @@
     private Path destPath = null;
     private byte[] buffer = null;
     private JobConf job;
-
+    private boolean skipCRCCheck = false;
+    
     // stats
     private int failcount = 0;
     private int skipcount = 0;
@@ -351,7 +356,7 @@
     private boolean needsUpdate(FileStatus srcstatus,
         FileSystem dstfs, Path dstpath) throws IOException {
       return update && !sameFile(srcstatus.getPath().getFileSystem(job),
-          srcstatus, dstfs, dstpath);
+          srcstatus, dstfs, dstpath, skipCRCCheck);
     }
     
     private FSDataOutputStream create(Path f, Reporter reporter,
@@ -384,7 +389,7 @@
             throws IOException {
       if (destFileSys.exists(absdst)) {
         if (sameFile(srcstat.getPath().getFileSystem(job), srcstat,
-            destFileSys, absdst)) {
+            destFileSys, absdst, skipCRCCheck)) {
           return true;
         }
       }
@@ -622,6 +627,7 @@
       }
       update = job.getBoolean(Options.UPDATE.propertyname, false);
       overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
+      skipCRCCheck = job.getBoolean(Options.SKIPCRC.propertyname, false);
       this.job = job;
     }
 
@@ -965,9 +971,15 @@
       final boolean isOverwrite = flags.contains(Options.OVERWRITE);
       final boolean isUpdate = flags.contains(Options.UPDATE);
       final boolean isDelete = flags.contains(Options.DELETE);
+      final boolean skipCRC = flags.contains(Options.SKIPCRC);
       if (isOverwrite && isUpdate) {
         throw new IllegalArgumentException("Conflicting overwrite policies");
       }
+      if (!isUpdate && skipCRC) {
+        throw new IllegalArgumentException(
+            Options.SKIPCRC.cmd + " is relevant only with the " +
+            Options.UPDATE.cmd + " option");
+      }
       if (isDelete && !isOverwrite && !isUpdate) {
         throw new IllegalArgumentException(Options.DELETE.cmd
             + " must be specified with " + Options.OVERWRITE + " or "
@@ -1171,9 +1183,11 @@
 
     //set boolean values
     final boolean update = args.flags.contains(Options.UPDATE);
+    final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC);
     final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE)
                               && !args.dryrun;
     jobConf.setBoolean(Options.UPDATE.propertyname, update);
+    jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck);
     jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
     jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
         args.flags.contains(Options.IGNORE_READ_FAILURES));
@@ -1335,7 +1349,8 @@
                 }
               }
               //skip path if the src and the dst files are the same.
-              skipPath = update && sameFile(srcfs, child, dstfs, destPath);
+              skipPath = update && 
+              	sameFile(srcfs, child, dstfs, destPath, skipCRCCheck);
               //skip path if it exceed file limit or size limit
               skipPath |= fileCount == args.filelimit
                           || byteCount + child.getLen() > args.sizelimit; 
@@ -1460,7 +1475,7 @@
    * two files are considered as the same if they have the same size.
    */
   static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus,
-      FileSystem dstfs, Path dstpath) throws IOException {
+      FileSystem dstfs, Path dstpath, boolean skipCRCCheck) throws IOException {
     FileStatus dststatus;
     try {
       dststatus = dstfs.getFileStatus(dstpath);
@@ -1473,6 +1488,11 @@
       return false;
     }
 
+    if (skipCRCCheck) {
+      LOG.debug("Skipping the CRC check");
+      return true;
+    }
+    
     //get src checksum
     final FileChecksum srccs;
     try {



Mime
View raw message