hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From weic...@apache.org
Subject hadoop git commit: HDFS-8224. Schedule a block for scanning if its metadata file is corrupt. Contributed by Rushabh S Shah.
Date Wed, 10 Aug 2016 18:42:05 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.8 d46f5ca7e -> 753edc493


HDFS-8224. Schedule a block for scanning if its metadata file is corrupt. Contributed by Rushabh
S Shah.

(cherry picked from commit 8efd4959f3fd48fe281aa26a46668071461dee8b)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/753edc49
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/753edc49
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/753edc49

Branch: refs/heads/branch-2.8
Commit: 753edc4931d9e878058124b51ef7feb192a201dd
Parents: d46f5ca
Author: Wei-Chiu Chuang <weichiu@apache.org>
Authored: Wed Aug 10 11:34:28 2016 -0700
Committer: Wei-Chiu Chuang <weichiu@apache.org>
Committed: Wed Aug 10 11:39:12 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/util/DataChecksum.java    |  4 +-
 .../util/InvalidChecksumSizeException.java      | 32 ++++++++++++
 .../hadoop/hdfs/server/datanode/DataNode.java   | 20 ++++++--
 .../hdfs/server/datanode/TestDiskError.java     | 54 ++++++++++++++++++++
 4 files changed, 105 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/753edc49/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
index faac587..7862404 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
@@ -122,8 +122,8 @@ public class DataChecksum implements Checksum {
     int bpc = in.readInt();
     DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
     if ( summer == null ) {
-      throw new IOException( "Could not create DataChecksum of type " +
-                             type + " with bytesPerChecksum " + bpc );
+      throw new InvalidChecksumSizeException("Could not create DataChecksum "
+          + "of type " + type + " with bytesPerChecksum " + bpc);
     }
     return summer;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/753edc49/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InvalidChecksumSizeException.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InvalidChecksumSizeException.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InvalidChecksumSizeException.java
new file mode 100644
index 0000000..b114c75
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InvalidChecksumSizeException.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.IOException;
+/**
+ * Thrown when bytesPerChecksun field in the meta file is less than
+ * or equal to 0 or type is invalid.
+ **/
+public class InvalidChecksumSizeException extends IOException {
+
+  private static final long serialVersionUID = 1L;
+
+  public InvalidChecksumSizeException(String s) {
+    super(s);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/753edc49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index cd8212f..9aef56e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -200,6 +200,7 @@ import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.DiskChecker;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.InvalidChecksumSizeException;
 import org.apache.hadoop.util.JvmPauseMonitor;
 import org.apache.hadoop.util.ServicePlugin;
 import org.apache.hadoop.util.StringUtils;
@@ -341,7 +342,7 @@ public class DataNode extends ReconfigurableBase
   BlockPoolTokenSecretManager blockPoolTokenSecretManager;
   private boolean hasAnyBlockPoolRegistered = false;
   
-  private final BlockScanner blockScanner;
+  private  BlockScanner blockScanner;
   private DirectoryScanner directoryScanner = null;
   
   /** Activated plug-ins. */
@@ -2081,7 +2082,8 @@ public class DataNode extends ReconfigurableBase
     LOG.warn(msg);
   }
 
-  private void transferBlock(ExtendedBlock block, DatanodeInfo[] xferTargets,
+  @VisibleForTesting
+  void transferBlock(ExtendedBlock block, DatanodeInfo[] xferTargets,
       StorageType[] xferTargetStorageTypes) throws IOException {
     BPOfferService bpos = getBPOSForBlock(block);
     DatanodeRegistration bpReg = getDNRegistrationForBP(block.getBlockPoolId());
@@ -2368,6 +2370,13 @@ public class DataNode extends ReconfigurableBase
           metrics.incrBlocksReplicated();
         }
       } catch (IOException ie) {
+        if (ie instanceof InvalidChecksumSizeException) {
+          // Add the block to the front of the scanning queue if metadata file
+          // is corrupt. We already add the block to front of scanner if the
+          // peer disconnects.
+          LOG.info("Adding block: " + b + " for scanning");
+          blockScanner.markSuspectBlock(data.getVolume(b).getStorageID(), b);
+        }
         LOG.warn(bpReg + ":Failed to transfer " + b + " to " +
             targets[0] + " got ", ie);
         // check if there are any disk problem
@@ -3285,4 +3294,9 @@ public class DataNode extends ReconfigurableBase
   ScheduledThreadPoolExecutor getMetricsLoggerTimer() {
     return metricsLoggerTimer;
   }
-}
+
+  @VisibleForTesting
+  void setBlockScanner(BlockScanner blockScanner) {
+    this.blockScanner = blockScanner;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/753edc49/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
index 1c5f6cd..9820a12 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
@@ -23,10 +23,12 @@ import static org.junit.Assert.assertTrue;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
@@ -37,11 +39,13 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@@ -50,6 +54,7 @@ import org.apache.hadoop.util.Time;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 /**
  * Test that datanodes can correctly handle errors during block read/write.
@@ -226,4 +231,53 @@ public class TestDiskError {
     long lastDiskErrorCheck = dataNode.getLastDiskErrorCheck();
     assertTrue("Disk Error check is not performed within  " + dataNode.checkDiskErrorInterval
+  "  ms", ((Time.monotonicNow()-lastDiskErrorCheck) < (dataNode.checkDiskErrorInterval
+ slackTime)));
   }
+
+  @Test
+  public void testDataTransferWhenBytesPerChecksumIsZero() throws IOException {
+    DataNode dn0 = cluster.getDataNodes().get(0);
+    // Make a mock blockScanner class and return false whenever isEnabled is
+    // called on blockScanner
+    BlockScanner mockScanner = Mockito.mock(BlockScanner.class);
+    Mockito.when(mockScanner.isEnabled()).thenReturn(false);
+    dn0.setBlockScanner(mockScanner);
+    Path filePath = new Path("test.dat");
+    FSDataOutputStream out = fs.create(filePath, (short) 1);
+    out.write(1);
+    out.hflush();
+    out.close();
+    // Corrupt the metadata file. Insert all 0's in the type and
+    // bytesPerChecksum files of the metadata header.
+    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
+    File metadataFile = cluster.getBlockMetadataFile(0, block);
+    RandomAccessFile raFile = new RandomAccessFile(metadataFile, "rw");
+    raFile.seek(2);
+    raFile.writeByte(0);
+    raFile.writeInt(0);
+    raFile.close();
+    String datanodeId0 = dn0.getDatanodeUuid();
+    LocatedBlock lb = DFSTestUtil.getAllBlocks(fs, filePath).get(0);
+    String storageId = lb.getStorageIDs()[0];
+    cluster.startDataNodes(conf, 1, true, null, null);
+    DataNode dn1 = null;
+    for (int i = 0; i < cluster.getDataNodes().size(); i++) {
+      if (!cluster.getDataNodes().get(i).equals(datanodeId0)) {
+        dn1 = cluster.getDataNodes().get(i);
+        break;
+      }
+    }
+    DatanodeDescriptor dnd1 =
+        NameNodeAdapter.getDatanode(cluster.getNamesystem(),
+            dn1.getDatanodeId());
+
+    dn0.transferBlock(block, new DatanodeInfo[]{dnd1},
+        new StorageType[]{StorageType.DISK});
+    // Sleep for 1 second so the DataTrasnfer daemon can start transfer.
+    try {
+      Thread.sleep(1000);
+    } catch (InterruptedException e) {
+      // Do nothing
+    }
+    Mockito.verify(mockScanner).markSuspectBlock(Mockito.eq(storageId),
+        Mockito.eq(block));
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message