hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From szets...@apache.org
Subject svn commit: r692570 - in /hadoop/core/trunk: ./ src/core/org/apache/hadoop/fs/ src/test/org/apache/hadoop/fs/ src/tools/org/apache/hadoop/tools/
Date Fri, 05 Sep 2008 22:50:55 GMT
Author: szetszwo
Date: Fri Sep  5 15:50:54 2008
New Revision: 692570

URL: http://svn.apache.org/viewvc?rev=692570&view=rev
Log:
HADOOP-3941. Extend FileSystem API to return file-checksums.

Added:
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileChecksum.java
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/LengthFileChecksum.java
Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/FilterFileSystem.java
    hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestFileSystem.java
    hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=692570&r1=692569&r2=692570&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri Sep  5 15:50:54 2008
@@ -132,6 +132,9 @@
 
     HADOOP-1869. Support access times for HDFS files. (dhruba)
 
+    HADOOP-3941. Extend FileSystem API to return file-checksums.
+    (szetszwo)
+
   IMPROVEMENTS
 
     HADOOP-3908. Fuse-dfs: better error message if llibhdfs.so doesn't exist.

Added: hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileChecksum.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileChecksum.java?rev=692570&view=auto
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileChecksum.java (added)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileChecksum.java Fri Sep  5 15:50:54
2008
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.io.Writable;
+
+/** An abstract class representing file checksums for files. */
+public abstract class FileChecksum implements Writable {
+  /** The checksum algorithm name */ 
+  public abstract String getAlgorithmName();
+
+  /** The length of the checksum in bytes */ 
+  public abstract int getLength();
+
+  /** The value of the checksum in bytes */ 
+  public abstract byte[] getBytes();
+
+  /** Return true if both the algorithms and the values are the same. */
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (other == null || !(other instanceof FileChecksum)) {
+      return false;
+    }
+
+    final FileChecksum that = (FileChecksum)other;
+    return this.getAlgorithmName().equals(that.getAlgorithmName())
+      && Arrays.equals(this.getBytes(), that.getBytes());
+  }
+  
+  /** {@inheritDoc} */
+  public int hashCode() {
+    return getAlgorithmName().hashCode() ^ Arrays.hashCode(getBytes());
+  }
+}
\ No newline at end of file

Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java?rev=692570&r1=692569&r2=692570&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java Fri Sep  5 15:50:54 2008
@@ -1286,6 +1286,16 @@
   public abstract FileStatus getFileStatus(Path f) throws IOException;
 
   /**
+   * Get the checksum of a file.
+   *
+   * @param f The file path
+   * @return The checksum 
+   */
+  public FileChecksum getFileChecksum(Path f) throws IOException {
+    return new LengthFileChecksum(getFileStatus(f).getLen());
+  }
+
+  /**
    * Return a list of file status objects that corresponds to the list of paths
    * excluding those non-existent paths.
    * 

Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/FilterFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/FilterFileSystem.java?rev=692570&r1=692569&r2=692570&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/FilterFileSystem.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/FilterFileSystem.java Fri Sep  5 15:50:54
2008
@@ -244,6 +244,11 @@
     return fs.getFileStatus(f);
   }
 
+  /** {@inheritDoc} */
+  public FileChecksum getFileChecksum(Path f) throws IOException {
+    return fs.getFileChecksum(f);
+  }
+
   @Override
   public Configuration getConf() {
     return fs.getConf();

Added: hadoop/core/trunk/src/core/org/apache/hadoop/fs/LengthFileChecksum.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/LengthFileChecksum.java?rev=692570&view=auto
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/LengthFileChecksum.java (added)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/LengthFileChecksum.java Fri Sep  5 15:50:54
2008
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/** An abstract class representing file checksums for files. */
+public class LengthFileChecksum extends FileChecksum {
+  public static final String ALGORITHM_NAME = "FILE-LENGTH";
+  
+  private long value;
+
+  /** Constructor */
+  public LengthFileChecksum() {}
+
+  public LengthFileChecksum(long value) {
+    this.value = value;
+  }
+
+  /** {@inheritDoc} */ 
+  public String getAlgorithmName() {return ALGORITHM_NAME;}
+
+  /** {@inheritDoc} */ 
+  public int getLength() {return Long.SIZE/Byte.SIZE;}
+
+  /** {@inheritDoc} */ 
+  public byte[] getBytes() {
+    final byte[] b = new byte[getLength()];
+    for(int i = 0; i < b.length; i++) {
+      b[i] = (byte)(value >>> (i*Byte.SIZE));
+    }
+    return b;
+  }
+
+  /** {@inheritDoc} */ 
+  public void readFields(DataInput in) throws IOException {
+    value = in.readLong();
+  }
+
+  /** {@inheritDoc} */ 
+  public void write(DataOutput out) throws IOException {
+    out.writeLong(value);    
+  }
+
+  /** {@inheritDoc} */ 
+  public String toString() {
+    return getClass().getSimpleName() + ": " + value;
+  }
+}
\ No newline at end of file

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestFileSystem.java?rev=692570&r1=692569&r2=692570&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestFileSystem.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestFileSystem.java Fri Sep  5 15:50:54
2008
@@ -58,6 +58,7 @@
 
 public class TestFileSystem extends TestCase {
   private static final Log LOG = FileSystem.LOG;
+  private static final Random RAN = new Random();
 
   private static Configuration conf = new Configuration();
   private static int BUFFER_SIZE = conf.getInt("io.file.buffer.size", 4096);
@@ -618,4 +619,38 @@
     assertTrue(map.containsKey(lowercaseCachekey2));    
 
   }
+  
+  public void testFileChecksum() throws IOException {
+    final long seed = RAN.nextLong();
+    System.out.println("seed=" + seed);
+    RAN.setSeed(seed);
+
+    final Configuration conf = new Configuration();
+    final String dir = ROOT + "/fileChecksum";
+    final LocalFileSystem fs = FileSystem.getLocal(conf);
+    final Path foo = new Path(dir, "foo");
+
+    //generate random data
+    final byte[] data = new byte[RAN.nextInt(3*1024) + 10*1024];
+    RAN.nextBytes(data);
+
+    //write data to a file
+    final FSDataOutputStream out = fs.create(foo);
+    out.write(data);
+    out.close();
+    
+    //compute checksum
+    final FileChecksum cs1 = fs.getFileChecksum(foo);
+    System.out.println("cs1=" + cs1);
+    
+    //rename the file and verify again
+    final Path bar = new Path(dir, "bar");
+    fs.rename(foo, bar);
+
+    { //verify checksum
+      final FileChecksum cs2 = fs.getFileChecksum(bar);
+      assertEquals(cs1.hashCode(), cs2.hashCode());
+      assertEquals(cs1, cs2);
+    }
+  }
 }

Modified: hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=692570&r1=692569&r2=692570&view=diff
==============================================================================
--- hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original)
+++ hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Fri Sep  5 15:50:54 2008
@@ -37,6 +37,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileChecksum;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsShell;
@@ -335,9 +336,9 @@
      * be meaningful in this context.
      * @throws IOException 
      */
-    private boolean needsUpdate(FileStatus src, FileSystem dstfs, Path dstpath
-        ) throws IOException {
-      return update && !sameFile(src, dstfs, dstpath);
+    private boolean needsUpdate(FileSystem srcfs, Path srcpath,
+        FileSystem dstfs, Path dstpath) throws IOException {
+      return update && !sameFile(srcfs, srcpath, dstfs, dstpath);
     }
     
     private FSDataOutputStream create(Path f, Reporter reporter,
@@ -386,8 +387,10 @@
         return;
       }
 
+      final Path srcpath = srcstat.getPath();
+      final FileSystem srcfs = srcpath.getFileSystem(job);
       if (destFileSys.exists(absdst) && !overwrite
-          && !needsUpdate(srcstat, destFileSys, absdst)) {
+          && !needsUpdate(srcfs, srcpath, destFileSys, absdst)) {
         outc.collect(null, new Text("SKIP: " + srcstat.getPath()));
         ++skipcount;
         reporter.incrCounter(Counter.SKIP, 1);
@@ -1048,7 +1051,7 @@
             else {
               //skip file if the src and the dst files are the same.
               final Path absdst = new Path(args.dst, dst);
-              skipfile = update && sameFile(child, dstfs, absdst);
+              skipfile = update && sameFile(srcfs,child.getPath(),dstfs,absdst);
               
               if (!skipfile) {
                 ++fileCount;
@@ -1134,15 +1137,15 @@
    * Check whether the src and the dst are the same.
    * Two files are considered as the same if they have the same size.
    */
-  static private boolean sameFile(FileStatus src, FileSystem dstfs, Path dstpath
-      ) throws IOException {
-    FileStatus dst = null;
+  static private boolean sameFile(FileSystem srcfs, Path srcpath,
+      FileSystem dstfs, Path dstpath) throws IOException {
     try {
-      dst = dstfs.getFileStatus(dstpath);
-    } catch (FileNotFoundException fnfe) {
+      final FileChecksum srccs = srcfs.getFileChecksum(srcpath);
+      final FileChecksum dstcs = dstfs.getFileChecksum(dstpath);
+      return srccs != null && srccs.equals(dstcs);
+    } catch(FileNotFoundException fnfe) {
       return false;
     }
-    return src.getLen() == dst.getLen();
   }
   
   /** Delete the dst files/dirs which do not exist in src */



Mime
View raw message