Author: cdouglas
Date: Thu May 15 13:55:08 2008
New Revision: 656830
URL: http://svn.apache.org/viewvc?rev=656830&view=rev
Log:
HADOOP-3013. Add corrupt block reporting to fsck. Contributed by lohit vijayarenu.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/java/org/apache/hadoop/dfs/NamenodeFsck.java
hadoop/core/trunk/src/test/org/apache/hadoop/dfs/TestFsck.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=656830&r1=656829&r2=656830&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu May 15 13:55:08 2008
@@ -137,6 +137,9 @@
HADOOP-3350. Add an argument to distcp to permit the user to limit the
number of maps. (cdouglas)
+ HADOOP-3013. Add corrupt block reporting to fsck.
+ (lohit vijayarenu via cdouglas)
+
OPTIMIZATIONS
HADOOP-3274. The default constructor of BytesWritable creates empty
Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/NamenodeFsck.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/NamenodeFsck.java?rev=656830&r1=656829&r2=656830&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/NamenodeFsck.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/NamenodeFsck.java Thu May 15 13:55:08
2008
@@ -184,6 +184,7 @@
}
if (res.totalFiles % 100 == 0) { out.flush(); }
int missing = 0;
+ int corrupt = 0;
long missize = 0;
int underReplicatedPerFile = 0;
int misReplicatedPerFile = 0;
@@ -191,6 +192,7 @@
int i = 0;
for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
Block block = lBlk.getBlock();
+ boolean isCorrupt = lBlk.isCorrupt();
String blkName = block.toString();
DatanodeInfo[] locs = lBlk.getLocations();
res.totalReplicas += locs.length;
@@ -199,6 +201,12 @@
res.excessiveReplicas += (locs.length - targetFileReplication);
res.numOverReplicatedBlocks += 1;
}
+ // Check if block is Corrupt
+ if (isCorrupt) {
+ corrupt++;
+ res.corruptBlocks++;
+ out.print("\n" + path + ": CORRUPT block " + block.getBlockName()+"\n");
+ }
if (locs.length >= minReplication)
res.numMinReplicatedBlocks++;
if (locs.length < targetFileReplication && locs.length > 0) {
@@ -253,8 +261,8 @@
report.append('\n');
i++;
}
- if (missing > 0) {
- if (!showFiles) {
+ if ((missing > 0) || (corrupt > 0)) {
+ if (!showFiles && (missing > 0)) {
out.print("\n" + path + ": MISSING " + missing
+ " blocks of total size " + missize + " B.");
}
@@ -493,6 +501,7 @@
private ArrayList<String> missingIds = new ArrayList<String>();
private long missingSize = 0L;
private long corruptFiles = 0L;
+ private long corruptBlocks = 0L;
private long excessiveReplicas = 0L;
private long missingReplicas = 0L;
private long numOverReplicatedBlocks = 0L;
@@ -515,7 +524,7 @@
* DFS is considered healthy if there are no missing blocks.
*/
public boolean isHealthy() {
- return missingIds.size() == 0;
+ return ((missingIds.size() == 0) && (corruptBlocks == 0));
}
/** Add a missing block name, plus its size. */
@@ -657,11 +666,16 @@
if (totalOpenFilesBlocks != 0)
res.append(" (Total open file blocks (not validated): " +
totalOpenFilesBlocks + ")");
- if (missingSize > 0) {
+ if (corruptFiles > 0) {
res.append("\n ********************************");
res.append("\n CORRUPT FILES:\t" + corruptFiles);
- res.append("\n MISSING BLOCKS:\t" + missingIds.size());
- res.append("\n MISSING SIZE:\t\t" + missingSize + " B");
+ if (missingSize > 0) {
+ res.append("\n MISSING BLOCKS:\t" + missingIds.size());
+ res.append("\n MISSING SIZE:\t\t" + missingSize + " B");
+ }
+ if (corruptBlocks > 0) {
+ res.append("\n CORRUPT BLOCKS: \t" + corruptBlocks);
+ }
res.append("\n ********************************");
}
res.append("\n Minimally replicated blocks:\t" + numMinReplicatedBlocks);
@@ -674,6 +688,7 @@
if (totalBlocks > 0) res.append(" (" + ((float) (numMisReplicatedBlocks *
100) / (float) totalBlocks) + " %)");
res.append("\n Default replication factor:\t" + replication);
res.append("\n Average block replication:\t" + getReplicationFactor());
+ res.append("\n Corrupt blocks:\t\t" + corruptBlocks);
res.append("\n Missing replicas:\t\t" + missingReplicas);
if (totalReplicas > 0) res.append(" (" + ((float) (missingReplicas * 100)
/ (float) totalReplicas) + " %)");
res.append("\n Number of data-nodes:\t\t" + totalDatanodes);
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/dfs/TestFsck.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/dfs/TestFsck.java?rev=656830&r1=656829&r2=656830&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/dfs/TestFsck.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/dfs/TestFsck.java Thu May 15 13:55:08 2008
@@ -22,6 +22,12 @@
import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.io.File;
+import java.io.RandomAccessFile;
+import java.lang.Exception;
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
+import java.util.Random;
import junit.framework.TestCase;
@@ -32,6 +38,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.io.IOUtils;
/**
* A JUnit test for doing fsck
@@ -206,4 +213,64 @@
if (cluster != null) { cluster.shutdown(); }
}
}
+
+ public void testCorruptBlock() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = null;
+ DFSClient dfsClient = null;
+ LocatedBlocks blocks = null;
+ int replicaCount = 0;
+ Random random = new Random();
+ String outStr = null;
+
+ MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
+ cluster.waitActive();
+ fs = cluster.getFileSystem();
+ Path file1 = new Path("/testCorruptBlock");
+ DFSTestUtil.createFile(fs, file1, 1024, (short)3, 0);
+ String block = DFSTestUtil.getFirstBlock(fs, file1).getBlockName();
+
+ // Make sure filesystem is in healthy state
+ outStr = runFsck(conf, "/");
+ System.out.println(outStr);
+ assertTrue(outStr.contains("HEALTHY"));
+
+ // corrupt replicas
+ File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
+ for (int i=0; i < 6; i++) {
+ File blockFile = new File(baseDir, "data" + (i+1) + "/current/" +
+ block);
+ if (blockFile.exists()) {
+ RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
+ FileChannel channel = raFile.getChannel();
+ String badString = "BADBAD";
+ int rand = random.nextInt((int)channel.size()/2);
+ raFile.seek(rand);
+ raFile.write(badString.getBytes());
+ raFile.close();
+ }
+ }
+ // Read the file to trigger reportBadBlocks
+ try {
+ IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf,
+ true);
+ } catch (IOException ie) {
+ // Ignore exception
+ }
+ dfsClient = new DFSClient(new InetSocketAddress("localhost",
+ cluster.getNameNodePort()), conf);
+ blocks = dfsClient.namenode.
+ getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
+ replicaCount = blocks.get(0).getLocations().length;
+ assertTrue (replicaCount == 3);
+ assertTrue (blocks.get(0).isCorrupt());
+
+ // Check if fsck reports the same
+ outStr = runFsck(conf, "/");
+ System.out.println(outStr);
+ assertTrue(outStr.contains("CORRUPT"));
+ assertTrue(outStr.contains("testCorruptBlock"));
+
+ cluster.shutdown();
+ }
}
|