hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sur...@apache.org
Subject svn commit: r1400555 - in /hadoop/common/branches/branch-1: ./ src/hdfs/org/apache/hadoop/hdfs/server/namenode/ src/test/org/apache/hadoop/hdfs/server/namenode/
Date Sun, 21 Oct 2012 01:58:58 GMT
Author: suresh
Date: Sun Oct 21 01:58:58 2012
New Revision: 1400555

URL: http://svn.apache.org/viewvc?rev=1400555&view=rev
Log:
HDFS-4072. On file deletion remove corresponding blocks pending replications. Contributed
by Jing Zhao.

Modified:
    hadoop/common/branches/branch-1/CHANGES.txt
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/PendingReplicationBlocks.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestPendingReplication.java

Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1400555&r1=1400554&r2=1400555&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Sun Oct 21 01:58:58 2012
@@ -270,6 +270,9 @@ Release 1.2.0 - unreleased
 
     HDFS-3402. Fix failure to start secure datanodes. (Benoy Antony via suresh)
 
+    HDFS-4072. On file deletion remove corresponding blocks pending
+    replications. (Jing Zhao via suresh)
+
 Release 1.1.1 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1400555&r1=1400554&r2=1400555&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Sun Oct 21 01:58:58 2012
@@ -287,7 +287,7 @@ public class FSNamesystem implements FSC
  */
   private UnderReplicatedBlocks neededReplications = new UnderReplicatedBlocks();
   // We also store pending replication-orders.
-  private PendingReplicationBlocks pendingReplications;
+  PendingReplicationBlocks pendingReplications;
 
   public LeaseManager leaseManager = new LeaseManager(this); 
 
@@ -2212,6 +2212,8 @@ public class FSNamesystem implements FSC
           Block b = blocks.get(i);
           blocksMap.removeINode(b);
           corruptReplicas.removeFromCorruptReplicasMap(b);
+          // Remove the block from pendingReplications
+          pendingReplications.remove(b);
           addToInvalidates(b);
         }
       }
@@ -3274,7 +3276,7 @@ public class FSNamesystem implements FSC
         // Move the block-replication into a "pending" state.
         // The reason we use 'pending' is so we can retry
         // replications that fail after an appropriate amount of time.
-        pendingReplications.add(block, targets.length);
+        pendingReplications.increment(block, targets.length);
         NameNode.stateChangeLog.debug(
             "BLOCK* block " + block
             + " is moved from neededReplications to pendingReplications");
@@ -4302,7 +4304,7 @@ public class FSNamesystem implements FSC
     //
     // Modify the blocks->datanode map and node's map.
     // 
-    pendingReplications.remove(block);
+    pendingReplications.decrement(block);
     addStoredBlock(block, node, delHintNode );
     
     // decrement number of blocks scheduled to this datanode.

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/PendingReplicationBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/PendingReplicationBlocks.java?rev=1400555&r1=1400554&r2=1400555&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/PendingReplicationBlocks.java
(original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/PendingReplicationBlocks.java
Sun Oct 21 01:58:58 2012
@@ -68,7 +68,7 @@ class PendingReplicationBlocks {
   /**
    * Add a block to the list of pending Replications
    */
-  void add(Block block, int numReplicas) {
+  void increment(Block block, int numReplicas) {
     synchronized (pendingReplications) {
       PendingBlockInfo found = pendingReplications.get(block);
       if (found == null) {
@@ -85,7 +85,7 @@ class PendingReplicationBlocks {
    * Decrement the number of pending replication requests
    * for this block.
    */
-  void remove(Block block) {
+  void decrement(Block block) {
     synchronized (pendingReplications) {
       PendingBlockInfo found = pendingReplications.get(block);
       if (found != null) {
@@ -97,6 +97,17 @@ class PendingReplicationBlocks {
       }
     }
   }
+  
+  /**
+   * Remove the record about the given block from pendingReplications.
+   * @param block The given block whose pending replication requests need to be
+   *              removed
+   */
+  void remove(Block block) {
+    synchronized (pendingReplications) {
+      pendingReplications.remove(block);
+    }
+  }
 
   /**
    * The total number of blocks that are undergoing replication

Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestPendingReplication.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestPendingReplication.java?rev=1400555&r1=1400554&r2=1400555&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestPendingReplication.java
(original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestPendingReplication.java
Sun Oct 21 01:58:58 2012
@@ -18,14 +18,26 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import junit.framework.TestCase;
-import java.lang.System;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 
 /**
- * This class tests the internals of PendingReplicationBlocks.java
+ * This class tests the internals of PendingReplicationBlocks.java,
+ * as well as how PendingReplicationBlocks acts in FSNamesystem
  */
 public class TestPendingReplication extends TestCase {
+  private static final int DFS_REPLICATION_INTERVAL = 1;
+  private static final int DFS_HEARTBEAT_INTERVAL = 15 * 60;
+  // Number of datanodes in the cluster
+  private static final int DATANODE_COUNT = 5;
+  
   public void testPendingReplication() {
     int timeout = 10;		// 10 seconds
     PendingReplicationBlocks pendingReplications;
@@ -36,7 +48,7 @@ public class TestPendingReplication exte
     //
     for (int i = 0; i < 10; i++) {
       Block block = new Block(i, i, 0);
-      pendingReplications.add(block, i);
+      pendingReplications.increment(block, i);
     }
     assertEquals("Size of pendingReplications ",
                  10, pendingReplications.size());
@@ -46,15 +58,15 @@ public class TestPendingReplication exte
     // remove one item and reinsert it
     //
     Block blk = new Block(8, 8, 0);
-    pendingReplications.remove(blk);             // removes one replica
+    pendingReplications.decrement(blk);             // removes one replica
     assertEquals("pendingReplications.getNumReplicas ",
                  7, pendingReplications.getNumReplicas(blk));
 
     for (int i = 0; i < 7; i++) {
-      pendingReplications.remove(blk);           // removes all replicas
+      pendingReplications.decrement(blk);           // removes all replicas
     }
     assertTrue(pendingReplications.size() == 9);
-    pendingReplications.add(blk, 8);
+    pendingReplications.increment(blk, 8);
     assertTrue(pendingReplications.size() == 10);
 
     //
@@ -82,7 +94,7 @@ public class TestPendingReplication exte
 
     for (int i = 10; i < 15; i++) {
       Block block = new Block(i, i, 0);
-      pendingReplications.add(block, i);
+      pendingReplications.increment(block, i);
     }
     assertTrue(pendingReplications.size() == 15);
 
@@ -111,4 +123,56 @@ public class TestPendingReplication exte
       assertTrue(timedOut[i].getBlockId() < 15);
     }
   }
+  
+  /**
+   * Test if BlockManager can correctly remove corresponding pending records
+   * when a file is deleted
+   * 
+   * @throws Exception
+   */
+  public void testPendingAndInvalidate() throws Exception {
+    final Configuration CONF = new Configuration();
+    CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
+    // Set the heartbeat interval to 15 min, so that no replication recovery
+    // work is doing during the test
+    CONF.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
+        DFS_HEARTBEAT_INTERVAL);
+    CONF.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY,
+        DFS_REPLICATION_INTERVAL);
+    MiniDFSCluster cluster = new MiniDFSCluster(CONF, DATANODE_COUNT, true,
+        null);
+    cluster.waitActive();
+
+    FSNamesystem namesystem = cluster.getNameNode().getNamesystem();
+    DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
+    try {
+      // 1. create a file
+      Path filePath = new Path("/tmp.txt");
+      DFSTestUtil.createFile(fs, filePath, 1024, (short) 3, 0L);
+
+      // 2. mark a block as corrupt on two DataNodes
+      LocatedBlock block = NameNodeAdapter.getBlockLocations(
+          cluster.getNameNode(), filePath.toString(), 0, 1).get(0);
+      namesystem.markBlockAsCorrupt(block.getBlock(), block.getLocations()[0]);
+      namesystem.markBlockAsCorrupt(block.getBlock(), block.getLocations()[1]);
+      namesystem.computeDatanodeWork();
+      assertEquals(namesystem.getPendingReplicationBlocks(), 1L);
+      assertEquals(
+          namesystem.pendingReplications.getNumReplicas(block.getBlock()), 2);
+
+      // 3. delete the file
+      fs.delete(filePath, true);
+      // retry at most 10 times, each time sleep for 1s. Note that 10s is much
+      // less than the default pending record timeout (5~10min)
+      int retries = 10;
+      long pendingNum = namesystem.pendingReplications.size();
+      while (pendingNum != 0 && retries-- > 0) {
+        Thread.sleep(1000); // let NN do the deletion
+        pendingNum = namesystem.pendingReplications.size();
+      }
+      assertEquals(pendingNum, 0L);
+    } finally {
+      cluster.shutdown();
+    }
+  }
 }



Mime
View raw message