hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r557039 - in /lucene/hadoop/trunk: CHANGES.txt src/test/org/apache/hadoop/dfs/DFSTestUtil.java src/test/org/apache/hadoop/dfs/TestCrcCorruption.java
Date Tue, 17 Jul 2007 19:56:28 GMT
Author: cutting
Date: Tue Jul 17 12:56:25 2007
New Revision: 557039

URL: http://svn.apache.org/viewvc?view=rev&rev=557039
Log:
HADOOP-1564.  Add unit tests for HDFS block-level checksums.  Contributed by Dhruba.

Added:
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCrcCorruption.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/DFSTestUtil.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=557039&r1=557038&r2=557039
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jul 17 12:56:25 2007
@@ -370,6 +370,9 @@
 115. HADOOP-1400.  Make JobClient retry requests, so that clients can
      survive jobtracker problems.  (omalley via cutting)
 
+116. HADOOP-1564.  Add unit tests for HDFS block-level checksums.
+     (Dhruba Borthakur via cutting)
+
 
 Release 0.13.0 - 2007-06-08
 

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/DFSTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/DFSTestUtil.java?view=diff&rev=557039&r1=557038&r2=557039
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/DFSTestUtil.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/DFSTestUtil.java Tue Jul 17 12:56:25
2007
@@ -90,11 +90,16 @@
     int getSize() { return size; }
     long getSeed() { return seed; }
   }
+
+  void createFiles(FileSystem fs, String topdir) throws IOException {
+    createFiles(fs, topdir, (short)3);
+  }
   
   /** create nFiles with random names and directory hierarchies
-   * with random (but reproducible) data in them.
+   *  with random (but reproducible) data in them.
    */
-  void createFiles(FileSystem fs, String topdir) throws IOException {
+  void createFiles(FileSystem fs, String topdir,
+                   short replicationFactor) throws IOException {
     files = new MyFile[nFiles];
     
     for (int idx = 0; idx < nFiles; idx++) {
@@ -109,7 +114,7 @@
         throw new IOException("Mkdirs failed to create " + 
                               fPath.getParent().toString());
       }
-      FSDataOutputStream out = fs.create(fPath);
+      FSDataOutputStream out = fs.create(fPath, replicationFactor);
       byte[] toWrite = new byte[files[idx].getSize()];
       Random rb = new Random(files[idx].getSeed());
       rb.nextBytes(toWrite);
@@ -132,7 +137,7 @@
       byte[] toCompare = new byte[files[idx].getSize()];
       Random rb = new Random(files[idx].getSeed());
       rb.nextBytes(toCompare);
-      assertEquals("Cannnot read file.", toRead.length, in.read(toRead));
+      in.readFully(0, toRead);
       in.close();
       for (int i = 0; i < toRead.length; i++) {
         if (toRead[i] != toCompare[i]) {
@@ -144,6 +149,52 @@
     }
     
     return true;
+  }
+
+  void setReplication(FileSystem fs, String topdir, short value) 
+                                              throws IOException {
+    Path root = new Path(topdir);
+    for (int idx = 0; idx < nFiles; idx++) {
+      Path fPath = new Path(root, files[idx].getName());
+      fs.setReplication(fPath, value);
+    }
+  }
+
+  // waits for the replication factor of all files to reach the
+  // specified target
+  //
+  void waitReplication(FileSystem fs, String topdir, short value) 
+                                              throws IOException {
+    Path root = new Path(topdir);
+
+    /** wait for the replication factor to settle down */
+    while (true) {
+      boolean good = true;
+      for (int idx = 0; idx < nFiles; idx++) {
+        Path fPath = new Path(root, files[idx].getName());
+        String locs[][] = fs.getFileCacheHints(fPath, 0, Long.MAX_VALUE);
+        for (int j = 0; j < locs.length; j++) {
+          String[] loc = locs[j];
+          if (loc.length != value) {
+            System.out.println("File " + fPath + " has replication factor " +
+                               loc.length);
+            good = false;
+            break;
+          }
+        }
+        if (!good) {
+          break;
+        }
+      }
+      if (!good) {
+        try {
+          System.out.println("Waiting for replication factor to drain");
+          Thread.sleep(1000);
+        } catch (InterruptedException e) {} 
+        continue;
+      }
+      break;
+    }
   }
   
   /** delete directory and everything underneath it.*/

Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCrcCorruption.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCrcCorruption.java?view=auto&rev=557039
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCrcCorruption.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCrcCorruption.java Tue Jul 17 12:56:25
2007
@@ -0,0 +1,225 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.dfs;
+
+import java.io.*;
+import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
+import java.util.Random;
+import junit.framework.*;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * A JUnit test for corrupted file handling.
+ * This test creates a bunch of files/directories with replication 
+ * factor of 2. Then verifies that a client can automatically 
+ * access the remaining valid replica inspite of the following 
+ * types of simulated errors:
+ *
+ *  1. Delete meta file on one replica
+ *  2. Truncates meta file on one replica
+ *  3. Corrupts the meta file header on one replica
+ *  4. Corrupts any random offset and portion of the meta file
+ *  5. Swaps two meta files, i.e the format of the meta files 
+ *     are valid but their CRCs do not match with their corresponding 
+ *     data blocks
+ * The above tests are run for varied values of io.bytes.per.checksum 
+ * and dfs.block.size. It tests for the case when the meta file is 
+ * multiple blocks.
+ *
+ * Another portion of the test is commented out till HADOOP-1557 
+ * is addressed:
+ *  1. Create file with 2 replica, corrupt the meta file of replica, 
+ *     decrease replication factor from 2 to 1. Validate that the 
+ *     remaining replica is the good one.
+ *  2. Create file with 2 replica, corrupt the meta file of one replica, 
+ *     increase replication factor of file to 3. verify that the new 
+ *     replica was created from the non-corrupted replica.
+ */
+public class TestCrcCorruption extends TestCase {
+  
+  public TestCrcCorruption(String testName) {
+    super(testName);
+  }
+
+  protected void setUp() throws Exception {
+  }
+
+  protected void tearDown() throws Exception {
+  }
+  
+  /** 
+   * check if DFS can handle corrupted CRC blocks
+   */
+  private void thistest(Configuration conf, DFSTestUtil util) throws Exception {
+    MiniDFSCluster cluster = null;
+    int numDataNodes = 2;
+    short replFactor = 2;
+    Random random = new Random();
+
+    try {
+      cluster = new MiniDFSCluster(conf, numDataNodes, true, null);
+      cluster.waitActive();
+      FileSystem fs = cluster.getFileSystem();
+      util.createFiles(fs, "/srcdat", replFactor);
+      util.waitReplication(fs, "/srcdat", (short)2);
+
+      // Now deliberately remove/truncate meta blocks from the first
+      // directory of the first datanode. The complete absense of a meta
+      // file disallows this Datanode to send data to another datanode.
+      // However, a client is alowed access to this block.
+      //
+      File data_dir = new File(System.getProperty("test.build.data"),
+                               "dfs/data/data1/current");
+      assertTrue("data directory does not exist", data_dir.exists());
+      File[] blocks = data_dir.listFiles();
+      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length
> 0));
+      int num = 0;
+      for (int idx = 0; idx < blocks.length; idx++) {
+        if (blocks[idx].getName().startsWith("blk_") &&
+            blocks[idx].getName().endsWith(".meta")) {
+          num++;
+          if (num % 3 == 0) {
+            //
+            // remove .meta file
+            //
+            System.out.println("Deliberately removing file " + blocks[idx].getName());
+            assertTrue("Cannot remove file.", blocks[idx].delete());
+          } else if (num % 3 == 1) {
+            //
+            // shorten .meta file
+            //
+            RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
+            FileChannel channel = file.getChannel();
+            int newsize = random.nextInt((int)channel.size()/2);
+            System.out.println("Deliberately truncating file " + 
+                               blocks[idx].getName() + 
+                               " to size " + newsize + " bytes.");
+            channel.truncate(newsize);
+            file.close();
+          } else {
+            //
+            // corrupt a few bytes of the metafile
+            //
+            RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
+            FileChannel channel = file.getChannel();
+            long position = 0;
+            //
+            // The very first time, corrupt the meta header at offset 0
+            //
+            if (num != 2) {
+              position = (long)random.nextInt((int)channel.size());
+            }
+            int length = random.nextInt((int)(channel.size() - position + 1));
+            byte[] buffer = new byte[length];
+            random.nextBytes(buffer);
+            channel.write(ByteBuffer.wrap(buffer), position);
+            System.out.println("Deliberately corrupting file " + 
+                               blocks[idx].getName() + 
+                               " at offset " + position +
+                               " length " + length);
+            file.close();
+          }
+        }
+      }
+      //
+      // Now deliberately corrupt all meta blocks from the second
+      // directory of the first datanode
+      //
+      data_dir = new File(System.getProperty("test.build.data"),
+                               "dfs/data/data2/current");
+      assertTrue("data directory does not exist", data_dir.exists());
+      blocks = data_dir.listFiles();
+      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length
> 0));
+
+      int count = 0;
+      File previous = null;
+      for (int idx = 0; idx < blocks.length; idx++) {
+        if (blocks[idx].getName().startsWith("blk_") &&
+            blocks[idx].getName().endsWith(".meta")) {
+          //
+          // Move the previous metafile into the current one.
+          //
+          count++;
+          if (count % 2 == 0) {
+            System.out.println("Deliberately insertimg bad crc into files " +
+                                blocks[idx].getName() + " " + previous.getName());
+            assertTrue("Cannot remove file.", blocks[idx].delete());
+            assertTrue("Cannot corrupt meta file.", previous.renameTo(blocks[idx]));
+            assertTrue("Cannot recreate empty meta file.", previous.createNewFile());
+            previous = null;
+          } else {
+            previous = blocks[idx];
+          }
+        }
+      }
+
+      //
+      // Only one replica is possibly corrupted. The other replica should still
+      // be good. Verify.
+      //
+      assertTrue("Corrupted replicas not handled properly.",
+                 util.checkFiles(fs, "/srcdat"));
+      System.out.println("All File still have a valid replica");
+
+      //
+      // set replication factor back to 1. This causes only one replica of
+      // of each block to remain in HDFS. The check is to make sure that 
+      // the corrupted replica generated above is the one that gets deleted.
+      // This test is currently disabled until HADOOP-1557 is solved.
+      //
+      util.setReplication(fs, "/srcdat", (short)1);
+      //util.waitReplication(fs, "/srcdat", (short)1);
+      //System.out.println("All Files done with removing replicas");
+      //assertTrue("Excess replicas deleted. Corrupted replicas found.",
+      //           util.checkFiles(fs, "/srcdat"));
+      System.out.println("The excess-corrupted-replica test is disabled " +
+                         " pending HADOOP-1557");
+
+      util.cleanup(fs, "/srcdat");
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+
+  public void testCrcCorruption() throws Exception {
+    //
+    // default parameters
+    //
+    System.out.println("TestCrcCorruption with default parameters");
+    Configuration conf1 = new Configuration();
+    conf1.setInt("dfs.blockreport.intervalMsec", 3 * 1000);
+    DFSTestUtil util1 = new DFSTestUtil("TestCrcCorruption", 40, 3, 8*1024);
+    thistest(conf1, util1);
+
+    //
+    // specific parameters
+    //
+    System.out.println("TestCrcCorruption with specific parameters");
+    Configuration conf2 = new Configuration();
+    conf2.setInt("io.bytes.per.checksum", 17);
+    conf2.setInt("dfs.block.size", 34);
+    DFSTestUtil util2 = new DFSTestUtil("TestCrcCorruption", 40, 3, 400);
+    thistest(conf2, util2);
+  }
+}



Mime
View raw message