hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jgho...@apache.org
Subject svn commit: r1065845 - in /hadoop/hdfs/trunk: ./ src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/
Date Mon, 31 Jan 2011 23:07:25 GMT
Author: jghoman
Date: Mon Jan 31 23:07:24 2011
New Revision: 1065845

URL: http://svn.apache.org/viewvc?rev=1065845&view=rev
Log:
HDFS-863. Potential deadlock in TestOverReplicatedBlocks. Contributed by Ken Goodhope.

Modified:
    hadoop/hdfs/trunk/CHANGES.txt
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestHeartbeatHandling.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestOverReplicatedBlocks.java

Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=1065845&r1=1065844&r2=1065845&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Mon Jan 31 23:07:24 2011
@@ -76,6 +76,9 @@ Trunk (unreleased changes)
 
     HDFS-1585. Fix build after HDFS-1547 (todd)
 
+    HDFS-863. Potential deadlock in TestOverReplicatedBlocks. 
+    (Ken Goodhope via jghoman)
+
 Release 0.22.0 - Unreleased
 
   NEW FEATURES

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestHeartbeatHandling.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestHeartbeatHandling.java?rev=1065845&r1=1065844&r2=1065845&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestHeartbeatHandling.java
(original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestHeartbeatHandling.java
Mon Jan 31 23:07:24 2011
@@ -57,48 +57,54 @@ public class TestHeartbeatHandling exten
       final int MAX_REPLICATE_BLOCKS = 2*MAX_REPLICATE_LIMIT+REMAINING_BLOCKS;
       final DatanodeDescriptor[] ONE_TARGET = new DatanodeDescriptor[1];
 
-      synchronized (namesystem.heartbeats) {
-      for (int i=0; i<MAX_REPLICATE_BLOCKS; i++) {
-        dd.addBlockToBeReplicated(
-            new Block(i, 0, GenerationStamp.FIRST_VALID_STAMP), ONE_TARGET);
-      }
-      DatanodeCommand[] cmds = namesystem.handleHeartbeat(
-          nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
-      assertEquals(1, cmds.length);
-      assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
-      assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
-      
-      ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
-      for (int i=0; i<MAX_INVALIDATE_BLOCKS; i++) {
-        blockList.add(new Block(i, 0, GenerationStamp.FIRST_VALID_STAMP));
-      }
-      dd.addBlocksToBeInvalidated(blockList);
-           
-      cmds = namesystem.handleHeartbeat(
-          nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
-      assertEquals(2, cmds.length);
-      assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
-      assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
-      assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
-      assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
-      
-      cmds = namesystem.handleHeartbeat(
-          nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
-      assertEquals(2, cmds.length);
-      assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
-      assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
-      assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
-      assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
-      
-      cmds = namesystem.handleHeartbeat(
-          nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
-      assertEquals(1, cmds.length);
-      assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
-      assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
-
-      cmds = namesystem.handleHeartbeat(
-          nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
-      assertEquals(null, cmds);
+      try {
+        namesystem.writeLock();
+        synchronized (namesystem.heartbeats) {
+          for (int i=0; i<MAX_REPLICATE_BLOCKS; i++) {
+            dd.addBlockToBeReplicated(
+                new Block(i, 0, GenerationStamp.FIRST_VALID_STAMP), ONE_TARGET);
+          }
+
+          DatanodeCommand[] cmds = namesystem.handleHeartbeat(
+              nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
+          assertEquals(1, cmds.length);
+          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
+          assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
+
+          ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
+          for (int i=0; i<MAX_INVALIDATE_BLOCKS; i++) {
+            blockList.add(new Block(i, 0, GenerationStamp.FIRST_VALID_STAMP));
+          }
+          dd.addBlocksToBeInvalidated(blockList);
+
+          cmds = namesystem.handleHeartbeat(
+              nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
+          assertEquals(2, cmds.length);
+          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
+          assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
+          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
+          assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
+
+          cmds = namesystem.handleHeartbeat(
+              nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
+          assertEquals(2, cmds.length);
+          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
+          assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
+          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
+          assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
+
+          cmds = namesystem.handleHeartbeat(
+              nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
+          assertEquals(1, cmds.length);
+          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
+          assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
+
+          cmds = namesystem.handleHeartbeat(
+              nodeReg, dd.getCapacity(), dd.getDfsUsed(), dd.getRemaining(), 0, 0);
+          assertEquals(null, cmds);
+        }
+      } finally {
+        namesystem.writeUnlock();
       }
     } finally {
       cluster.shutdown();

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java?rev=1065845&r1=1065844&r2=1065845&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java
(original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java
Mon Jan 31 23:07:24 2011
@@ -64,11 +64,18 @@ public class TestNodeCount extends TestC
       // bring down first datanode
       DatanodeDescriptor datanode = datanodes[0];
       DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
+      
       // make sure that NN detects that the datanode is down
-      synchronized (namesystem.heartbeats) {
-        datanode.setLastUpdate(0); // mark it dead
-        namesystem.heartbeatCheck();
+      try {
+        namesystem.writeLock();
+        synchronized (namesystem.heartbeats) {
+          datanode.setLastUpdate(0); // mark it dead
+          namesystem.heartbeatCheck();
+        }
+      } finally {
+        namesystem.writeUnlock();
       }
+      
       // the block will be replicated
       DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
 
@@ -100,23 +107,29 @@ public class TestNodeCount extends TestC
       // bring down non excessive datanode
       dnprop = cluster.stopDataNode(nonExcessDN.getName());
       // make sure that NN detects that the datanode is down
-      synchronized (namesystem.heartbeats) {
-        nonExcessDN.setLastUpdate(0); // mark it dead
-        namesystem.heartbeatCheck();
-      }
       
+      try {
+        namesystem.writeLock();
+        synchronized (namesystem.heartbeats) {
+          nonExcessDN.setLastUpdate(0); // mark it dead
+          namesystem.heartbeatCheck();
+        }
+      } finally {
+        namesystem.writeUnlock();
+      }
+
       // The block should be replicated
       do {
         num = namesystem.blockManager.countNodes(block);
       } while (num.liveReplicas() != REPLICATION_FACTOR);
-      
+
       // restart the first datanode
       cluster.restartDataNode(dnprop);
       cluster.waitActive();
-      
+
       // check if excessive replica is detected
       do {
-       num = namesystem.blockManager.countNodes(block);
+        num = namesystem.blockManager.countNodes(block);
       } while (num.excessReplicas() != 2);
     } finally {
       cluster.shutdown();

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestOverReplicatedBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestOverReplicatedBlocks.java?rev=1065845&r1=1065844&r2=1065845&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestOverReplicatedBlocks.java
(original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestOverReplicatedBlocks.java
Mon Jan 31 23:07:24 2011
@@ -75,22 +75,28 @@ public class TestOverReplicatedBlocks ex
       final DatanodeID corruptDataNode = 
         cluster.getDataNodes().get(2).dnRegistration;
       final FSNamesystem namesystem = cluster.getNamesystem();
-      synchronized (namesystem.heartbeats) {
-        // set live datanode's remaining space to be 0 
-        // so they will be chosen to be deleted when over-replication occurs
-        for (DatanodeDescriptor datanode : namesystem.heartbeats) {
-          if (!corruptDataNode.equals(datanode)) {
-            datanode.updateHeartbeat(100L, 100L, 0L, 0);
+      try {
+        namesystem.writeLock();
+        synchronized (namesystem.heartbeats) {
+          // set live datanode's remaining space to be 0 
+          // so they will be chosen to be deleted when over-replication occurs
+          for (DatanodeDescriptor datanode : namesystem.heartbeats) {
+            if (!corruptDataNode.equals(datanode)) {
+              datanode.updateHeartbeat(100L, 100L, 0L, 0);
+            }
           }
-        }
-        
-        // decrease the replication factor to 1; 
-        namesystem.setReplication(fileName.toString(), (short)1);
 
-        // corrupt one won't be chosen to be excess one
-        // without 4910 the number of live replicas would be 0: block gets lost
-        assertEquals(1, namesystem.blockManager.countNodes(block).liveReplicas());
+          // decrease the replication factor to 1; 
+          namesystem.setReplication(fileName.toString(), (short)1);
+
+          // corrupt one won't be chosen to be excess one
+          // without 4910 the number of live replicas would be 0: block gets lost
+          assertEquals(1, namesystem.blockManager.countNodes(block).liveReplicas());
+        }
+      } finally {
+        namesystem.writeUnlock();
       }
+      
     } finally {
       cluster.shutdown();
     }



Mime
View raw message