hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hair...@apache.org
Subject svn commit: r732788 - in /hadoop/core/branches/branch-0.18: ./ src/hdfs/org/apache/hadoop/dfs/ src/test/org/apache/hadoop/dfs/
Date Thu, 08 Jan 2009 18:37:55 GMT
Author: hairong
Date: Thu Jan  8 10:37:55 2009
New Revision: 732788

URL: http://svn.apache.org/viewvc?rev=732788&view=rev
Log:
Merge -r 732776:732777 from trunk to move the change of HADOOP-4910 into branch 0.18.

Added:
    hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestOverReplicatedBlocks.java
Modified:
    hadoop/core/branches/branch-0.18/CHANGES.txt
    hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java
    hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java
    hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestDatanodeBlockScanner.java

Modified: hadoop/core/branches/branch-0.18/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/CHANGES.txt?rev=732788&r1=732787&r2=732788&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.18/CHANGES.txt Thu Jan  8 10:37:55 2009
@@ -133,6 +133,9 @@
     HADOOP-4971. A long (unexpected) delay at datanodes could make subsequent
     block reports from many datanode at the same time. (Raghu Angadi)
     
+    HADOOP-4910. NameNode should exclude replicas when choosing excessive
+    replicas to delete to avoid data lose. (hairong)
+
 Release 0.18.2 - 2008-11-03
 
   BUG FIXES

Modified: hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java?rev=732788&r1=732787&r2=732788&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java Thu
Jan  8 10:37:55 2009
@@ -2963,13 +2963,17 @@
       delNodeHint = null;
     }
     Collection<DatanodeDescriptor> nonExcess = new ArrayList<DatanodeDescriptor>();
+    Collection<DatanodeDescriptor> corruptNodes = corruptReplicas.getNodes(block);
     for (Iterator<DatanodeDescriptor> it = blocksMap.nodeIterator(block); 
          it.hasNext();) {
       DatanodeDescriptor cur = it.next();
       Collection<Block> excessBlocks = excessReplicateMap.get(cur.getStorageID());
       if (excessBlocks == null || !excessBlocks.contains(block)) {
         if (!cur.isDecommissionInProgress() && !cur.isDecommissioned()) {
-          nonExcess.add(cur);
+          // exclude corrupt replicas
+          if (corruptNodes == null || !corruptNodes.contains(cur)) {
+            nonExcess.add(cur);
+          }
         }
       }
     }

Modified: hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java?rev=732788&r1=732787&r2=732788&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java (original)
+++ hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java Thu
Jan  8 10:37:55 2009
@@ -624,7 +624,7 @@
   /*
    * Restart a particular datanode
    */
-  synchronized boolean restartDataNode(int i) throws IOException {
+  public synchronized boolean restartDataNode(int i) throws IOException {
     DataNodeProperties dnprop = stopDataNode(i);
     if (dnprop == null) {
       return false;

Modified: hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestDatanodeBlockScanner.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestDatanodeBlockScanner.java?rev=732788&r1=732787&r2=732788&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestDatanodeBlockScanner.java
(original)
+++ hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestDatanodeBlockScanner.java
Thu Jan  8 10:37:55 2009
@@ -141,7 +141,7 @@
     cluster.shutdown();
   }
 
-  boolean corruptReplica(String blockName, int replica) throws IOException {
+  public static boolean corruptReplica(String blockName, int replica) throws IOException
{
     Random random = new Random();
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     boolean corrupted = false;
@@ -420,7 +420,7 @@
     }
   }
   
-  private void truncateReplica(String blockName, int dnIndex) throws IOException {
+  private static void truncateReplica(String blockName, int dnIndex) throws IOException {
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     for (int i=dnIndex*2; i<dnIndex*2+2; i++) {
       File blockFile = new File(baseDir, "data" + (i+1)+ "/current/" + 
@@ -434,7 +434,7 @@
     }
   }
   
-  private void waitForBlockDeleted(String blockName, int dnIndex) 
+  private static void waitForBlockDeleted(String blockName, int dnIndex) 
   throws IOException, InterruptedException {
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     File blockFile1 = new File(baseDir, "data" + (2*dnIndex+1)+ "/current/" + 

Added: hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestOverReplicatedBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestOverReplicatedBlocks.java?rev=732788&view=auto
==============================================================================
--- hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestOverReplicatedBlocks.java
(added)
+++ hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestOverReplicatedBlocks.java
Thu Jan  8 10:37:55 2009
@@ -0,0 +1,68 @@
+package org.apache.hadoop.dfs;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.dfs.DFSTestUtil;
+import org.apache.hadoop.dfs.MiniDFSCluster;
+import org.apache.hadoop.dfs.TestDatanodeBlockScanner;
+import org.apache.hadoop.dfs.Block;
+import org.apache.hadoop.dfs.DatanodeID;
+
+import junit.framework.TestCase;
+
+public class TestOverReplicatedBlocks extends TestCase {
+  /** Test processOverReplicatedBlock can handle corrupt replicas fine.
+   * It make sure that it won't treat corrupt replicas as valid ones 
+   * thus prevents NN deleting valid replicas but keeping
+   * corrupt ones.
+   */
+  public void testProcesOverReplicateBlock() throws IOException {
+    Configuration conf = new Configuration();
+    conf.setLong("dfs.blockreport.intervalMsec", 1000L);
+    conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
+    FileSystem fs = cluster.getFileSystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short)3);
+      
+      // corrupt the block on datanode 0
+      Block block = DFSTestUtil.getFirstBlock(fs, fileName);
+      TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0);
+      File scanLog = new File(System.getProperty("test.build.data"),
+          "dfs/data/data1/current/dncp_block_verification.log.curr");
+      assertTrue(scanLog.delete()); 
+      // restart the datanode so the corrupt replica will be detected
+      cluster.restartDataNode(0);
+      DFSTestUtil.waitReplication(fs, fileName, (short)2);
+      
+      final DatanodeID corruptDataNode = 
+        cluster.getDataNodes().get(2).dnRegistration;
+      final FSNamesystem namesystem = FSNamesystem.getFSNamesystem();
+      synchronized (namesystem.heartbeats) {
+        // set live datanode's remaining space to be 0 
+        // so they will be chosen to be deleted when over-replication occurs
+        for (DatanodeDescriptor datanode : namesystem.heartbeats) {
+          if (!corruptDataNode.equals(datanode)) {
+            datanode.updateHeartbeat(100L, 100L, 0L, 0);
+          }
+        }
+        
+        // decrease the replication factor to 1; 
+        namesystem.setReplication(fileName.toString(), (short)1);
+
+        // corrupt one won't be chosen to be excess one
+        // without 4910 the number of live replicas would be 0: block gets lost
+        assertEquals(1, namesystem.countNodes(block).liveReplicas());
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}



Mime
View raw message