hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kih...@apache.org
Subject hadoop git commit: HDFS-11817. A faulty node can cause a lease leak and NPE on accessing data. Contributed by Kihwal Lee.
Date Thu, 25 May 2017 22:22:36 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 fc6cb4b2d -> 2cb63433a


HDFS-11817. A faulty node can cause a lease leak and NPE on accessing data. Contributed by
Kihwal Lee.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2cb63433
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2cb63433
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2cb63433

Branch: refs/heads/branch-2
Commit: 2cb63433abb21cd2b74bd266b70a682caf9e2d98
Parents: fc6cb4b
Author: Kihwal Lee <kihwal@apache.org>
Authored: Thu May 25 17:21:56 2017 -0500
Committer: Kihwal Lee <kihwal@apache.org>
Committed: Thu May 25 17:21:56 2017 -0500

----------------------------------------------------------------------
 .../BlockUnderConstructionFeature.java          | 30 ++++++++++---
 .../server/blockmanagement/DatanodeManager.java |  3 +-
 .../hdfs/server/namenode/FSDirTruncateOp.java   |  2 +-
 .../hdfs/server/namenode/FSNamesystem.java      |  2 +-
 .../hdfs/server/namenode/LeaseManager.java      | 15 +++++--
 .../TestBlockUnderConstructionFeature.java      |  8 ++--
 .../namenode/TestBlockUnderConstruction.java    | 45 ++++++++++++++++++++
 .../TestCommitBlockSynchronization.java         |  2 +-
 .../namenode/ha/TestRetryCacheWithHA.java       |  9 +++-
 9 files changed, 98 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockUnderConstructionFeature.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockUnderConstructionFeature.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockUnderConstructionFeature.java
index ddcdd0f..c43ace7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockUnderConstructionFeature.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockUnderConstructionFeature.java
@@ -69,11 +69,24 @@ public class BlockUnderConstructionFeature {
 
   /** Set expected locations */
   public void setExpectedLocations(Block block, DatanodeStorageInfo[] targets) {
-    int numLocations = targets == null ? 0 : targets.length;
+    if (targets == null) {
+      return;
+    }
+    int numLocations = 0;
+    for (DatanodeStorageInfo target : targets) {
+      if (target != null) {
+        numLocations++;
+      }
+    }
+
     this.replicas = new ReplicaUnderConstruction[numLocations];
-    for(int i = 0; i < numLocations; i++) {
-      replicas[i] = new ReplicaUnderConstruction(block, targets[i],
-          ReplicaState.RBW);
+    int offset = 0;
+    for(int i = 0; i < targets.length; i++) {
+      // Only store non-null DatanodeStorageInfo.
+      if (targets[i] != null) {
+        replicas[i] = new ReplicaUnderConstruction(block,
+            targets[i], ReplicaState.RBW);
+      }
     }
   }
 
@@ -142,10 +155,17 @@ public class BlockUnderConstructionFeature {
    * Initialize lease recovery for this block.
    * Find the first alive data-node starting from the previous primary and
    * make it primary.
+   * @param blockInfo Block to be recovered
+   * @param recoveryId Recovery ID (new gen stamp)
+   * @param startRecovery Issue recovery command to datanode if true.
    */
-  public void initializeBlockRecovery(BlockInfo blockInfo, long recoveryId) {
+  public void initializeBlockRecovery(BlockInfo blockInfo, long recoveryId,
+      boolean startRecovery) {
     setBlockUCState(BlockUCState.UNDER_RECOVERY);
     blockRecoveryId = recoveryId;
+    if (!startRecovery) {
+      return;
+    }
     if (replicas.length == 0) {
       NameNode.blockStateChangeLog.warn("BLOCK*" +
           " BlockUnderConstructionFeature.initializeBlockRecovery:" +

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index dd96181..5eb6760 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -561,10 +561,11 @@ public class DatanodeManager {
       DatanodeID[] datanodeID, String[] storageIDs,
       String format, Object... args) throws UnregisteredNodeException {
     if (datanodeID.length != storageIDs.length) {
+      // Error for pre-2.0.0-alpha clients.
       final String err = (storageIDs.length == 0?
           "Missing storageIDs: It is likely that the HDFS client,"
           + " who made this call, is running in an older version of Hadoop"
-          + " which does not support storageIDs."
+          + "(pre-2.0.0-alpha)  which does not support storageIDs."
           : "Length mismatched: storageIDs.length=" + storageIDs.length + " != "
           ) + " datanodeID.length=" + datanodeID.length;
       throw new HadoopIllegalArgumentException(

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
index 018719a..7ff699f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
@@ -259,7 +259,7 @@ final class FSDirTruncateOp {
     }
     if (shouldRecoverNow) {
       truncatedBlockUC.getUnderConstructionFeature().initializeBlockRecovery(
-          truncatedBlockUC, newBlock.getGenerationStamp());
+          truncatedBlockUC, newBlock.getGenerationStamp(), true);
     }
 
     return newBlock;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index dc349f1..63d2af9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3203,7 +3203,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       } else if(truncateRecovery) {
         recoveryBlock.setGenerationStamp(blockRecoveryId);
       }
-      uc.initializeBlockRecovery(lastBlock, blockRecoveryId);
+      uc.initializeBlockRecovery(lastBlock, blockRecoveryId, true);
       leaseManager.renewLease(lease);
       // Cannot close file right now, since the last block requires recovery.
       // This may potentially cause infinite loop in lease recovery

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index 3cc7bd1..e2a30ce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -380,9 +380,16 @@ public class LeaseManager {
           if (!p.startsWith("/")) {
             throw new IOException("Invalid path in the lease " + p);
           }
-          boolean completed = fsnamesystem.internalReleaseLease(
-              leaseToCheck, p, iip,
-              HdfsServerConstants.NAMENODE_LEASE_HOLDER);
+          boolean completed = false;
+          try {
+            completed = fsnamesystem.internalReleaseLease(
+                leaseToCheck, p, iip,
+                HdfsServerConstants.NAMENODE_LEASE_HOLDER);
+          } catch (IOException e) {
+            LOG.warn("Cannot release the path " + p + " in the lease "
+                + leaseToCheck + ". It will be retried.", e);
+            continue;
+          }
           if (LOG.isDebugEnabled()) {
             if (completed) {
               LOG.debug("Lease recovery for inode " + id + " is complete. " +
@@ -396,7 +403,7 @@ public class LeaseManager {
             needSync = true;
           }
         } catch (IOException e) {
-          LOG.error("Cannot release the path " + p + " in the lease "
+          LOG.warn("Removing lease with an invalid path: " + p + ","
               + leaseToCheck, e);
           removing.add(id);
         }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockUnderConstructionFeature.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockUnderConstructionFeature.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockUnderConstructionFeature.java
index 717292e..15502c9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockUnderConstructionFeature.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockUnderConstructionFeature.java
@@ -50,7 +50,7 @@ public class TestBlockUnderConstructionFeature {
     DFSTestUtil.resetLastUpdatesWithOffset(dd1, -3 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd2, -1 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd3, -2 * 1000);
-    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 1);
+    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 1, true);
     BlockInfo[] blockInfoRecovery = dd2.getLeaseRecoveryCommand(1);
     assertEquals(blockInfoRecovery[0], blockInfo);
 
@@ -58,7 +58,7 @@ public class TestBlockUnderConstructionFeature {
     DFSTestUtil.resetLastUpdatesWithOffset(dd1, -2 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd2, -1 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd3, -3 * 1000);
-    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 2);
+    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 2, true);
     blockInfoRecovery = dd1.getLeaseRecoveryCommand(1);
     assertEquals(blockInfoRecovery[0], blockInfo);
 
@@ -66,7 +66,7 @@ public class TestBlockUnderConstructionFeature {
     DFSTestUtil.resetLastUpdatesWithOffset(dd1, -2 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd2, -1 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd3, -3 * 1000);
-    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 3);
+    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 3, true);
     blockInfoRecovery = dd3.getLeaseRecoveryCommand(1);
     assertEquals(blockInfoRecovery[0], blockInfo);
 
@@ -75,7 +75,7 @@ public class TestBlockUnderConstructionFeature {
     DFSTestUtil.resetLastUpdatesWithOffset(dd1, -2 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd2, -1 * 1000);
     DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
-    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 3);
+    blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo, 3, true);
     blockInfoRecovery = dd3.getLeaseRecoveryCommand(1);
     assertEquals(blockInfoRecovery[0], blockInfo);
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBlockUnderConstruction.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBlockUnderConstruction.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBlockUnderConstruction.java
index ab6cde3..49e79da 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBlockUnderConstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBlockUnderConstruction.java
@@ -38,12 +38,16 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockUnderConstructionFeature;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.LocatedBlockProto;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+
 public class TestBlockUnderConstruction {
   static final String BASE_DIR = "/test/TestBlockUnderConstruction";
   static final int BLOCK_SIZE = 8192; // same as TestFileCreation.blocksize
@@ -183,4 +187,45 @@ public class TestBlockUnderConstruction {
     // close file
     out.close();
   }
+
+  /**
+   * A storage ID can be invalid if the storage failed or the node
+   * reregisters. When the node heart-beats, the storage report in it
+   * causes storage volumes to be added back. An invalid storage ID
+   * should not cause an NPE.
+   */
+  @Test
+  public void testEmptyExpectedLocations() throws Exception {
+    final NamenodeProtocols namenode = cluster.getNameNodeRpc();
+    final FSNamesystem fsn = cluster.getNamesystem();
+    final BlockManager bm = fsn.getBlockManager();
+    final Path p = new Path(BASE_DIR, "file2.dat");
+    final String src = p.toString();
+    final FSDataOutputStream out = TestFileCreation.createFile(hdfs, p, 1);
+    writeFile(p, out, 256);
+    out.hflush();
+
+    // make sure the block is readable
+    LocatedBlocks lbs = namenode.getBlockLocations(src, 0, 256);
+    LocatedBlock lastLB = lbs.getLocatedBlocks().get(0);
+    final Block b = lastLB.getBlock().getLocalBlock();
+
+    // fake a block recovery
+    long blockRecoveryId = bm.nextGenerationStamp(false);
+    BlockUnderConstructionFeature uc = bm.getStoredBlock(b).
+        getUnderConstructionFeature();
+    uc.initializeBlockRecovery(null, blockRecoveryId, false);
+
+    try {
+      String[] storages = { "invalid-storage-id1" };
+      fsn.commitBlockSynchronization(lastLB.getBlock(), blockRecoveryId, 256L,
+          true, false, lastLB.getLocations(), storages);
+    } catch (java.lang.IllegalStateException ise) {
+       // Although a failure is expected as of now, future commit policy
+       // changes may make it not fail. This is not critical to the test.
+    }
+
+    // Invalid storage should not trigger an exception.
+    lbs = namenode.getBlockLocations(src, 0, 256);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java
index d4c5924..d3b387f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java
@@ -73,7 +73,7 @@ public class TestCommitBlockSynchronization {
     blockInfo.setBlockCollectionId(file.getId());
     blockInfo.setGenerationStamp(genStamp);
     blockInfo.getUnderConstructionFeature().initializeBlockRecovery(blockInfo,
-        genStamp);
+        genStamp, true);
     doReturn(blockInfo).when(file).removeLastBlock(any(Block.class));
     doReturn(true).when(file).isUnderConstruction();
     doReturn(new BlockInfoContiguous[1]).when(file).getBlocks();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2cb63433/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
index 28593b2..12fa211 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
@@ -73,6 +73,7 @@ import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
@@ -737,7 +738,13 @@ public class TestRetryCacheWithHA {
       DatanodeInfo[] newNodes = new DatanodeInfo[2];
       newNodes[0] = nodes[0];
       newNodes[1] = nodes[1];
-      String[] storageIDs = {"s0", "s1"};
+      final DatanodeManager dm = cluster.getNamesystem(0).getBlockManager()
+          .getDatanodeManager();
+      final String storageID1 = dm.getDatanode(newNodes[0]).getStorageInfos()[0]
+          .getStorageID();
+      final String storageID2 = dm.getDatanode(newNodes[1]).getStorageInfos()[0]
+          .getStorageID();
+      String[] storageIDs = {storageID1, storageID2};
       
       client.getNamenode().updatePipeline(client.getClientName(), oldBlock,
           newBlock, newNodes, storageIDs);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message