hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sur...@apache.org
Subject svn commit: r1078113 - in /hadoop/hdfs/branches/HDFS-1052: ./ src/c++/libhdfs/ src/contrib/hdfsproxy/ src/java/ src/java/org/apache/hadoop/hdfs/server/datanode/ src/java/org/apache/hadoop/hdfs/server/datanode/metrics/ src/java/org/apache/hadoop/hdfs/se...
Date Fri, 04 Mar 2011 20:06:47 GMT
Author: suresh
Date: Fri Mar  4 20:06:46 2011
New Revision: 1078113

URL: http://svn.apache.org/viewvc?rev=1078113&view=rev
Log:
Merging change r1034932 from trunk into federation

Added:
    hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java
      - copied, changed from r1034932, hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java
Modified:
    hadoop/hdfs/branches/HDFS-1052/   (props changed)
    hadoop/hdfs/branches/HDFS-1052/CHANGES.txt
    hadoop/hdfs/branches/HDFS-1052/build.xml   (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/c++/libhdfs/   (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/contrib/hdfsproxy/   (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/java/   (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
    hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
  (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
    hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java
    hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
    hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/   (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
    hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
    hadoop/hdfs/branches/HDFS-1052/src/webapps/datanode/   (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/webapps/hdfs/   (props changed)
    hadoop/hdfs/branches/HDFS-1052/src/webapps/secondary/   (props changed)

Propchange: hadoop/hdfs/branches/HDFS-1052/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -1,4 +1,4 @@
 /hadoop/core/branches/branch-0.19/hdfs:713112
 /hadoop/hdfs/branches/HDFS-265:796829-820463
 /hadoop/hdfs/branches/branch-0.21:820487
-/hadoop/hdfs/trunk:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1036738,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1036738,1052823,1060619,1061067,1062020

Modified: hadoop/hdfs/branches/HDFS-1052/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/CHANGES.txt?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/CHANGES.txt (original)
+++ hadoop/hdfs/branches/HDFS-1052/CHANGES.txt Fri Mar  4 20:06:46 2011
@@ -289,6 +289,9 @@ Release 0.22.0 - Unreleased
 
     HDFS-1164. TestHdfsProxy is failing. (Todd Lipcon via cos)
 
+    HDFS-811. Add metrics, failure reporting and additional tests for HDFS-457.
+    (eli)
+
   IMPROVEMENTS
 
     HDFS-1304. Add a new unit test for HftpFileSystem.open(..).  (szetszwo)

Propchange: hadoop/hdfs/branches/HDFS-1052/build.xml
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -2,4 +2,4 @@
 /hadoop/core/trunk/build.xml:779102
 /hadoop/hdfs/branches/HDFS-265/build.xml:796829-820463
 /hadoop/hdfs/branches/branch-0.21/build.xml:820487
-/hadoop/hdfs/trunk/build.xml:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/build.xml:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Propchange: hadoop/hdfs/branches/HDFS-1052/src/c++/libhdfs/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -1,3 +1,3 @@
 /hadoop/core/branches/branch-0.19/mapred/src/c++/libhdfs:713112
 /hadoop/core/trunk/src/c++/libhdfs:776175-784663
-/hadoop/hdfs/trunk/src/c++/libhdfs:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/c++/libhdfs:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Propchange: hadoop/hdfs/branches/HDFS-1052/src/contrib/hdfsproxy/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -2,4 +2,4 @@
 /hadoop/core/trunk/src/contrib/hdfsproxy:776175-784663
 /hadoop/hdfs/branches/HDFS-265/src/contrib/hdfsproxy:796829-820463
 /hadoop/hdfs/branches/branch-0.21/src/contrib/hdfsproxy:820487
-/hadoop/hdfs/trunk/src/contrib/hdfsproxy:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/contrib/hdfsproxy:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Propchange: hadoop/hdfs/branches/HDFS-1052/src/java/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -2,4 +2,4 @@
 /hadoop/core/trunk/src/hdfs:776175-785643,785929-786278
 /hadoop/hdfs/branches/HDFS-265/src/java:796829-820463
 /hadoop/hdfs/branches/branch-0.21/src/java:820487
-/hadoop/hdfs/trunk/src/java:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/java:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Modified: hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
Fri Mar  4 20:06:46 2011
@@ -1683,6 +1683,8 @@ public class DataNode extends Configured
     // shutdown the DN completely.
     int dpError = hasEnoughResources ? DatanodeProtocol.DISK_ERROR  
                                      : DatanodeProtocol.FATAL_DISK_ERROR;  
+    myMetrics.volumesFailed.inc(1);
+
     //inform NameNodes
     for(BPOfferService bpos: blockPoolManager.getAllNamenodeThreads()) {
       DatanodeProtocol nn = bpos.bpNamenode;

Propchange: hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -4,4 +4,4 @@
 /hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java:776175-785643,785929-786278
 /hadoop/hdfs/branches/HDFS-265/src/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java:796829-820463
 /hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java:820487
-/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java:1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java:1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Modified: hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
Fri Mar  4 20:06:46 2011
@@ -76,6 +76,9 @@ public class DataNodeMetrics implements 
               new MetricsTimeVaryingInt("writes_from_local_client", registry);
   public MetricsTimeVaryingInt writesFromRemoteClient = 
               new MetricsTimeVaryingInt("writes_from_remote_client", registry);
+
+  public MetricsTimeVaryingInt volumesFailed =
+    new MetricsTimeVaryingInt("volumes_failed", registry);
   
   public MetricsTimeVaryingRate readBlockOp = 
                 new MetricsTimeVaryingRate("readBlockOp", registry);

Modified: hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java
Fri Mar  4 20:06:46 2011
@@ -120,7 +120,6 @@ public class DatanodeDescriptor extends 
   private long lastBlocksScheduledRollTime = 0;
   private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min
   private int volumeFailures = 0;
-  
   /** 
    * When set to true, the node is not in include list and is not allowed
    * to communicate with the namenode
@@ -658,7 +657,13 @@ public class DatanodeDescriptor extends 
       return startTime;
     }
   }  // End of class DecommissioningStatus
-  
+
+  /**
+   * Increment the volume failure count.
+   */
+  public void incVolumeFailure() {
+    volumeFailures++;
+  }
   
   /**
    * Set the flag to indicate if this datanode is disallowed from communicating
@@ -671,4 +676,20 @@ public class DatanodeDescriptor extends 
   boolean isDisallowed() {
     return disallowed;
   }
+
+  /**
+   * @return number of failed volumes in the datanode.
+   */
+  public int getVolumeFailures() {
+    return volumeFailures;
+  }
+
+  /**
+   * Reset the volume failure count when a DN re-registers.
+   * @param nodeReg DatanodeID to update registration for.
+   */
+  public void updateRegInfo(DatanodeID nodeReg) {
+    super.updateRegInfo(nodeReg);
+    volumeFailures = 0;
+  }
 }

Modified: hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Fri Mar  4 20:06:46 2011
@@ -2906,6 +2906,7 @@ public class FSNamesystem implements FSC
    * The datanode will be informed of this work at the next heartbeat.
    * 
    * @return number of blocks scheduled for replication or removal.
+   * @throws IOException
    */
   public int computeDatanodeWork() throws IOException {
     int workFound = 0;
@@ -2940,8 +2941,25 @@ public class FSNamesystem implements FSC
   }
 
   /**
-   * remove a datanode descriptor
-   * @param nodeID datanode ID
+   * Update the descriptor for the datanode to reflect a volume failure.
+   * @param nodeID DatanodeID to update count for.
+   * @throws IOException
+   */
+  synchronized public void incVolumeFailure(DatanodeID nodeID)
+    throws IOException {
+    DatanodeDescriptor nodeInfo = getDatanode(nodeID);
+    if (nodeInfo != null) {
+      nodeInfo.incVolumeFailure();
+    } else {
+      NameNode.stateChangeLog.warn("BLOCK* NameSystem.incVolumeFailure: "
+                                   + nodeID.getName() + " does not exist");
+    }
+  }
+
+  /**
+   * Remove a datanode descriptor.
+   * @param nodeID datanode ID.
+   * @throws IOException
    */
   public void removeDatanode(DatanodeID nodeID) 
     throws IOException {
@@ -2960,8 +2978,8 @@ public class FSNamesystem implements FSC
   }
   
   /**
-   * remove a datanode descriptor
-   * @param nodeInfo datanode descriptor
+   * Remove a datanode descriptor.
+   * @param nodeInfo datanode descriptor.
    */
   private void removeDatanode(DatanodeDescriptor nodeInfo) {
     synchronized (heartbeats) {
@@ -3008,8 +3026,9 @@ public class FSNamesystem implements FSC
 
   /**
    * Physically remove node from datanodeMap.
-   * 
+   *
    * @param nodeID node
+   * @throws IOException
    */
   void wipeDatanode(DatanodeID nodeID) throws IOException {
     String key = nodeID.getStorageID();

Modified: hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
Fri Mar  4 20:06:46 2011
@@ -1294,6 +1294,7 @@ public class NameNode implements Namenod
       return;
     }
     verifyRequest(nodeReg);
+    namesystem.incVolumeFailure(nodeReg);
     if (errorCode == DatanodeProtocol.DISK_ERROR) {
       LOG.warn("Volume failed on " + dnName); 
     } else if (errorCode == DatanodeProtocol.FATAL_DISK_ERROR) {

Propchange: hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -2,4 +2,4 @@
 /hadoop/core/trunk/src/test/hdfs:776175-785643
 /hadoop/hdfs/branches/HDFS-265/src/test/hdfs:796829-820463
 /hadoop/hdfs/branches/branch-0.21/src/test/hdfs:820487
-/hadoop/hdfs/trunk/src/test/hdfs:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/test/hdfs:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Modified: hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
Fri Mar  4 20:06:46 2011
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
-
 import java.io.File;
 import java.io.FilenameFilter;
 import java.io.IOException;
@@ -27,8 +26,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import junit.framework.TestCase;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -45,10 +42,16 @@ import org.apache.hadoop.hdfs.server.nam
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.net.NetUtils;
+
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.*;
 
-public class TestDataNodeVolumeFailure extends TestCase{
+/**
+ * Fine-grain testing of block files and locations after volume failure.
+ */
+public class TestDataNodeVolumeFailure {
   final private int block_size = 512;
   MiniDFSCluster cluster = null;
   int dn_num = 2;
@@ -68,7 +71,6 @@ public class TestDataNodeVolumeFailure e
 
   @Before
   public void setUp() throws Exception {
-    
     // bring up a cluster of 2
     Configuration conf = new HdfsConfiguration();
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, block_size);
@@ -77,9 +79,26 @@ public class TestDataNodeVolumeFailure e
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(dn_num).build();
     cluster.waitActive();
   }
+
+  @After
+  public void tearDown() throws Exception {
+    if(data_fail != null) {
+      data_fail.setWritable(true);
+    }
+    if(failedDir != null) {
+      failedDir.setWritable(true);
+    }
+    if(cluster != null) {
+      cluster.shutdown();
+    }
+  }
   
-  
-  
+  /*
+   * Verify the number of blocks and files are correct after volume failure,
+   * and that we can replicate to both datanodes even after a single volume
+   * failure if the configuration parameter allows this.
+   */
+  @Test
   public void testVolumeFailure() throws IOException {
     FileSystem fs = cluster.getFileSystem();
     dataDir = new File(cluster.getDataDirectory());
@@ -133,12 +152,10 @@ public class TestDataNodeVolumeFailure e
     Path fileName1 = new Path("/test1.txt");
     DFSTestUtil.createFile(fs, fileName1, filesize, repl, 1L);
     
-    
     // should be able to replicate to both nodes (2 DN, repl=2)
     DFSTestUtil.waitReplication(fs, fileName1, repl);
     System.out.println("file " + fileName1.getName() + 
         " is created and replicated");
-    
   }
   
   /**
@@ -165,10 +182,11 @@ public class TestDataNodeVolumeFailure e
       // System.out.println(bid + "->" + bl.num_files + "vs." + bl.num_locs);
       // number of physical files (1 or 2) should be same as number of datanodes
       // in the list of the block locations
-      assertEquals(bl.num_files, bl.num_locs);
+      assertEquals("Num files should match num locations",
+          bl.num_files, bl.num_locs);
     }
-    // verify we have the same number of physical blocks and stored in NN
-    assertEquals(totalReal, totalNN);
+    assertEquals("Num physical blocks should match num stored in the NN",
+        totalReal, totalNN);
 
     // now check the number of under-replicated blocks
     FSNamesystem fsn = cluster.getNamesystem();
@@ -185,7 +203,8 @@ public class TestDataNodeVolumeFailure e
         (totalReal + totalRepl) + " vs. all files blocks " + blocks_num*2);
 
     // together all the blocks should be equal to all real + all underreplicated
-    assertEquals(totalReal + totalRepl, blocks_num*repl);
+    assertEquals("Incorrect total block count",
+        totalReal + totalRepl, blocks_num * repl);
   }
   
   /**
@@ -196,13 +215,12 @@ public class TestDataNodeVolumeFailure e
    */
   private void triggerFailure(String path, long size) throws IOException {
     NameNode nn = cluster.getNameNode();
-    List<LocatedBlock> locatedBlocks = nn.getBlockLocations(path, 0, size).getLocatedBlocks();
-//    System.out.println("Number of blocks: " + locatedBlocks.size()); 
+    List<LocatedBlock> locatedBlocks =
+      nn.getBlockLocations(path, 0, size).getLocatedBlocks();
     
-    for(LocatedBlock lb : locatedBlocks) {
+    for (LocatedBlock lb : locatedBlocks) {
       DatanodeInfo dinfo = lb.getLocations()[1];
       ExtendedBlock b = lb.getBlock();
-    //  System.out.println(i++ + ". " + b.getBlockName());
       try {
         accessBlock(dinfo, lb);
       } catch (IOException e) {
@@ -219,7 +237,6 @@ public class TestDataNodeVolumeFailure e
    * @throws IOException
    */
   private boolean deteteBlocks(File dir) {
-    
     File [] fileList = dir.listFiles();
     for(File f : fileList) {
       if(f.getName().startsWith("blk_")) {
@@ -228,7 +245,6 @@ public class TestDataNodeVolumeFailure e
         
       }
     }
-    
     return true;
   }
   
@@ -301,7 +317,6 @@ public class TestDataNodeVolumeFailure e
    * @param map
    * @return
    */
-
   private int countRealBlocks(Map<String, BlockLocs> map) {
     int total = 0;
     final String bpid = cluster.getNamesystem().getBlockPoolId();
@@ -324,7 +339,7 @@ public class TestDataNodeVolumeFailure e
         //int ii = 0;
         for(String s: res) {
           // cut off "blk_-" at the beginning and ".meta" at the end
-          assertNotNull(s);
+          assertNotNull("Block file name should not be null", s);
           String bid = s.substring(s.indexOf("_")+1, s.lastIndexOf("_"));
           //System.out.println(ii++ + ". block " + s + "; id=" + bid);
           BlockLocs val = map.get(bid);
@@ -358,18 +373,4 @@ public class TestDataNodeVolumeFailure e
     );
     return res;
   }
-
-  @After
-  public void tearDown() throws Exception {
-    if(data_fail != null) {
-      data_fail.setWritable(true);
-    }
-    if(failedDir != null) {
-      failedDir.setWritable(true);
-    }
-    if(cluster != null) {
-      cluster.shutdown();
-    }
-  }
-
 }

Copied: hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java
(from r1034932, hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java)
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java?p2=hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java&p1=hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java&r1=1034932&r2=1078113&rev=1078113&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java
Fri Mar  4 20:06:46 2011
@@ -147,9 +147,9 @@ public class TestDataNodeVolumeFailureRe
     DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file1, (short)3);
     ArrayList<DataNode> dns = cluster.getDataNodes();
-    assertTrue("DN1 should be up", DataNode.isDatanodeUp(dns.get(0)));
-    assertTrue("DN2 should be up", DataNode.isDatanodeUp(dns.get(1)));
-    assertTrue("DN3 should be up", DataNode.isDatanodeUp(dns.get(2)));
+    assertTrue("DN1 should be up", dns.get(0).isDatanodeUp());
+    assertTrue("DN2 should be up", dns.get(1).isDatanodeUp());
+    assertTrue("DN3 should be up", dns.get(2).isDatanodeUp());
 
     /*
      * The metrics should confirm the volume failures.
@@ -188,7 +188,7 @@ public class TestDataNodeVolumeFailureRe
     Path file2 = new Path("/test2");
     DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file2, (short)3);
-    assertTrue("DN3 should still be up", DataNode.isDatanodeUp(dns.get(2)));
+    assertTrue("DN3 should still be up", dns.get(2).isDatanodeUp());
     assertEquals("Vol3 should report 1 failure",
         1, metrics3.volumesFailed.getCurrentIntervalValue());
     live.clear();
@@ -233,7 +233,7 @@ public class TestDataNodeVolumeFailureRe
     DFSTestUtil.createFile(fs, file3, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file3, (short)2);
     // Eventually the DN should go down
-    while (DataNode.isDatanodeUp(dns.get(2))) {
+    while (dns.get(2).isDatanodeUp()) {
       Thread.sleep(1000);
     }
     // and report two failed volumes

Modified: hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java?rev=1078113&r1=1078112&r2=1078113&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
(original)
+++ hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
Fri Mar  4 20:06:46 2011
@@ -17,14 +17,11 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
-
 import java.io.DataOutputStream;
 import java.io.File;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 
-import junit.framework.TestCase;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -41,8 +38,40 @@ import org.apache.hadoop.hdfs.server.nam
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 
-/** Test if a datanode can correctly handle errors during block read/write*/
-public class TestDiskError extends TestCase {
+import org.junit.Test;
+import org.junit.Before;
+import org.junit.After;
+import static org.junit.Assert.*;
+
+/**
+ * Test that datanodes can correctly handle errors during block read/write.
+ */
+public class TestDiskError {
+
+  private FileSystem fs;
+  private MiniDFSCluster cluster;
+  private Configuration conf;
+  private String dataDir;
+
+  @Before
+  public void setUp() throws Exception {
+    conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 512L);
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+    cluster.waitActive();
+    fs = cluster.getFileSystem();
+    dataDir = cluster.getDataDirectory();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    cluster.shutdown();
+  }
+
+  /**
+   * Test to check that a DN goes down when all its volumes have failed.
+   */
+  @Test
   public void testShutdown() throws Exception {
     if (System.getProperty("os.name").startsWith("Windows")) {
       /**
@@ -53,12 +82,9 @@ public class TestDiskError extends TestC
        */
       return;
     }
-    // bring up a cluster of 3
-    Configuration conf = new HdfsConfiguration();
-    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 512L);
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    // Bring up two more datanodes
+    cluster.startDataNodes(conf, 2, true, null, null);
     cluster.waitActive();
-    FileSystem fs = cluster.getFileSystem();
     final int dnIndex = 0;
     String bpid = cluster.getNamesystem().getBlockPoolId();
     File storageDir = MiniDFSCluster.getStorageDir(dnIndex, 0);
@@ -67,8 +93,8 @@ public class TestDiskError extends TestC
     File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
     try {
       // make the data directory of the first datanode to be readonly
-      assertTrue(dir1.setReadOnly());
-      assertTrue(dir2.setReadOnly());
+      assertTrue("Couldn't chmod local vol", dir1.setReadOnly());
+      assertTrue("Couldn't chmod local vol", dir2.setReadOnly());
 
       // create files and make sure that first datanode will be down
       DataNode dn = cluster.getDataNodes().get(dnIndex);
@@ -82,108 +108,92 @@ public class TestDiskError extends TestC
       // restore its old permission
       dir1.setWritable(true);
       dir2.setWritable(true);
-      cluster.shutdown();
     }
   }
-  
+
+  /**
+   * Test that when there is a failure replicating a block the temporary
+   * and meta files are cleaned up and subsequent replication succeeds.
+   */
+  @Test
   public void testReplicationError() throws Exception {
-    // bring up a cluster of 1
-    Configuration conf = new HdfsConfiguration();
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+    // create a file of replication factor of 1
+    final Path fileName = new Path("/test.txt");
+    final int fileLen = 1;
+    DFSTestUtil.createFile(fs, fileName, 1, (short)1, 1L);
+    DFSTestUtil.waitReplication(fs, fileName, (short)1);
+
+    // get the block belonged to the created file
+    LocatedBlocks blocks = NameNodeAdapter.getBlockLocations(
+        cluster.getNameNode(), fileName.toString(), 0, (long)fileLen);
+    assertEquals("Should only find 1 block", blocks.locatedBlockCount(), 1);
+    LocatedBlock block = blocks.get(0);
+
+    // bring up a second datanode
+    cluster.startDataNodes(conf, 1, true, null, null);
     cluster.waitActive();
-    FileSystem fs = cluster.getFileSystem();
+    final int sndNode = 1;
+    DataNode datanode = cluster.getDataNodes().get(sndNode);
     
-    try {
-      // create a file of replication factor of 1
-      final Path fileName = new Path("/test.txt");
-      final int fileLen = 1;
-      DFSTestUtil.createFile(fs, fileName, 1, (short)1, 1L);
-      DFSTestUtil.waitReplication(fs, fileName, (short)1);
-
-      // get the block belonged to the created file
-      LocatedBlocks blocks = NameNodeAdapter.getBlockLocations(
-          cluster.getNameNode(), fileName.toString(), 0, (long)fileLen);
-      assertEquals(blocks.locatedBlockCount(), 1);
-      LocatedBlock block = blocks.get(0);
-      
-      // bring up a second datanode
-      cluster.startDataNodes(conf, 1, true, null, null);
-      cluster.waitActive();
-      final int sndNode = 1;
-      DataNode datanode = cluster.getDataNodes().get(sndNode);
-      
-      // replicate the block to the second datanode
-      InetSocketAddress target = datanode.getSelfAddr();
-      Socket s = new Socket(target.getAddress(), target.getPort());
-        //write the header.
-      DataOutputStream out = new DataOutputStream(
-          s.getOutputStream());
-
-      Sender.opWriteBlock(out, block.getBlock(), 1, 
-          BlockConstructionStage.PIPELINE_SETUP_CREATE, 
-          0L, 0L, 0L, "", null, new DatanodeInfo[0], 
-          BlockTokenSecretManager.DUMMY_TOKEN);
-
-      // write check header
-      out.writeByte( 1 );
-      out.writeInt( 512 );
-
-      out.flush();
-
-      // close the connection before sending the content of the block
-      out.close();
-      
-      // the temporary block & meta files should be deleted
-      String bpid = cluster.getNamesystem().getBlockPoolId();
-      File storageDir = MiniDFSCluster.getStorageDir(sndNode, 0);
-      File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid);
-      storageDir = MiniDFSCluster.getStorageDir(sndNode, 1);
-      File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
-      while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) {
-        Thread.sleep(100);
-      }
-      
-      // then increase the file's replication factor
-      fs.setReplication(fileName, (short)2);
-      // replication should succeed
-      DFSTestUtil.waitReplication(fs, fileName, (short)1);
-      
-      // clean up the file
-      fs.delete(fileName, false);
-    } finally {
-      cluster.shutdown();
+    // replicate the block to the second datanode
+    InetSocketAddress target = datanode.getSelfAddr();
+    Socket s = new Socket(target.getAddress(), target.getPort());
+    // write the header.
+    DataOutputStream out = new DataOutputStream(s.getOutputStream());
+
+    Sender.opWriteBlock(out, block.getBlock(), 1,
+        BlockConstructionStage.PIPELINE_SETUP_CREATE,
+        0L, 0L, 0L, "", null, new DatanodeInfo[0],
+        BlockTokenSecretManager.DUMMY_TOKEN);
+
+    // write check header
+    out.writeByte( 1 );
+    out.writeInt( 512 );
+    out.flush();
+
+    // close the connection before sending the content of the block
+    out.close();
+
+    // the temporary block & meta files should be deleted
+    String bpid = cluster.getNamesystem().getBlockPoolId();
+    File storageDir = MiniDFSCluster.getStorageDir(sndNode, 0);
+    File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid);
+    storageDir = MiniDFSCluster.getStorageDir(sndNode, 1);
+    File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
+    while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) {
+      Thread.sleep(100);
     }
+
+    // then increase the file's replication factor
+    fs.setReplication(fileName, (short)2);
+    // replication should succeed
+    DFSTestUtil.waitReplication(fs, fileName, (short)1);
+
+    // clean up the file
+    fs.delete(fileName, false);
   }
 
+  /**
+   * Check that the permissions of the local DN directories are as expected.
+   */
+  @Test
   public void testLocalDirs() throws Exception {
     Configuration conf = new Configuration();
     final String permStr = "755";
     FsPermission expected = new FsPermission(permStr);
     conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY, permStr);
-    MiniDFSCluster cluster = null;
 
-    try {
-      // Start the cluster
-      cluster = new MiniDFSCluster.Builder(conf).build();
-      cluster.waitActive();
-
-      // Check permissions on directories in 'dfs.data.dir'
-      FileSystem localFS = FileSystem.getLocal(conf);
-      for (DataNode dn : cluster.getDataNodes()) {
-        String[] dataDirs =
-          dn.getConf().getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
-        for (String dir : dataDirs) {
-          Path dataDir = new Path(dir);
-          FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
+    // Check permissions on directories in 'dfs.data.dir'
+    FileSystem localFS = FileSystem.getLocal(conf);
+    for (DataNode dn : cluster.getDataNodes()) {
+      String[] dataDirs =
+        dn.getConf().getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
+      for (String dir : dataDirs) {
+        Path dataDir = new Path(dir);
+        FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
           assertEquals("Permission for dir: " + dataDir + ", is " + actual +
-                           ", while expected is " + expected,
-                       expected, actual);
-        }
+              ", while expected is " + expected, expected, actual);
       }
-    } finally {
-      if (cluster != null)
-        cluster.shutdown();
     }
-
   }
 }

Propchange: hadoop/hdfs/branches/HDFS-1052/src/webapps/datanode/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -2,4 +2,4 @@
 /hadoop/core/trunk/src/webapps/datanode:776175-784663
 /hadoop/hdfs/branches/HDFS-265/src/webapps/datanode:796829-820463
 /hadoop/hdfs/branches/branch-0.21/src/webapps/datanode:820487
-/hadoop/hdfs/trunk/src/webapps/datanode:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/webapps/datanode:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Propchange: hadoop/hdfs/branches/HDFS-1052/src/webapps/hdfs/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -2,4 +2,4 @@
 /hadoop/core/trunk/src/webapps/hdfs:776175-784663
 /hadoop/hdfs/branches/HDFS-265/src/webapps/hdfs:796829-820463
 /hadoop/hdfs/branches/branch-0.21/src/webapps/hdfs:820487
-/hadoop/hdfs/trunk/src/webapps/hdfs:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/webapps/hdfs:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020

Propchange: hadoop/hdfs/branches/HDFS-1052/src/webapps/secondary/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar  4 20:06:46 2011
@@ -2,4 +2,4 @@
 /hadoop/core/trunk/src/webapps/secondary:776175-784663
 /hadoop/hdfs/branches/HDFS-265/src/webapps/secondary:796829-820463
 /hadoop/hdfs/branches/branch-0.21/src/webapps/secondary:820487
-/hadoop/hdfs/trunk/src/webapps/secondary:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1052823,1060619,1061067,1062020
+/hadoop/hdfs/trunk/src/webapps/secondary:987665-1004788,1026178-1028906,1032470-1033639,1034073,1034082-1034181,1034501-1034544,1034932,1052823,1060619,1061067,1062020



Mime
View raw message