Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8EBA99CD1 for ; Thu, 6 Sep 2012 06:20:18 +0000 (UTC) Received: (qmail 71075 invoked by uid 500); 6 Sep 2012 06:20:18 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 70858 invoked by uid 500); 6 Sep 2012 06:20:17 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 70842 invoked by uid 99); 6 Sep 2012 06:20:16 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 06 Sep 2012 06:20:16 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 06 Sep 2012 06:20:15 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id C165523888E3; Thu, 6 Sep 2012 06:19:30 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1381472 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/datanode/ src/test/java/org/apache/hadoop/hdfs/server/datanode/ Date: Thu, 06 Sep 2012 06:19:30 -0000 To: hdfs-commits@hadoop.apache.org From: eli@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120906061930.C165523888E3@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: eli Date: Thu Sep 6 06:19:30 2012 New Revision: 1381472 URL: http://svn.apache.org/viewvc?rev=1381472&view=rev Log: HDFS-3828. Block Scanner rescans blocks too frequently. Contributed by Andy Isaacson Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMultipleNNDataBlockScanner.java Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1381472&r1=1381471&r2=1381472&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Thu Sep 6 06:19:30 2012 @@ -725,6 +725,9 @@ Branch-2 ( Unreleased changes ) (Vinay via umamahesh) HDFS-1490. TransferFSImage should timeout (Dmytro Molkov and Vinay via todd) + + HDFS-3828. Block Scanner rescans blocks too frequently. + (Andy Isaacson via eli) BREAKDOWN OF HDFS-3042 SUBTASKS Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java?rev=1381472&r1=1381471&r2=1381472&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java Thu Sep 6 06:19:30 2012 @@ -51,6 +51,8 @@ import org.apache.hadoop.hdfs.util.DataT import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Time; +import com.google.common.annotations.VisibleForTesting; + /** * Scans the block files under a block pool and verifies that the * files are not corrupt. @@ -255,6 +257,11 @@ class BlockPoolSliceScanner { } } + @VisibleForTesting + long getTotalScans() { + return totalScans; + } + /** @return the last scan time for the block pool. */ long getLastScanTime() { return lastScanTime.get(); @@ -563,7 +570,24 @@ class BlockPoolSliceScanner { currentPeriodStart = Time.now(); } + private synchronized boolean workRemainingInCurrentPeriod() { + if (bytesLeft <= 0 && Time.now() < currentPeriodStart + scanPeriod) { + if (LOG.isDebugEnabled()) { + LOG.debug("Skipping scan since bytesLeft=" + bytesLeft + ", Start=" + + currentPeriodStart + ", period=" + scanPeriod + ", now=" + + Time.now() + " " + blockPoolId); + } + return false; + } else { + return true; + } + } + void scanBlockPoolSlice() { + if (!workRemainingInCurrentPeriod()) { + return; + } + // Create a new processedBlocks structure processedBlocks = new HashMap(); if (!assignInitialVerificationTimes()) { @@ -608,14 +632,14 @@ class BlockPoolSliceScanner { LOG.warn("RuntimeException during BlockPoolScanner.scan()", e); throw e; } finally { - cleanUp(); + rollVerificationLogs(); if (LOG.isDebugEnabled()) { LOG.debug("Done scanning block pool: " + blockPoolId); } } } - private synchronized void cleanUp() { + private synchronized void rollVerificationLogs() { if (verificationLog != null) { try { verificationLog.logs.roll(); Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java?rev=1381472&r1=1381471&r2=1381472&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java Thu Sep 6 06:19:30 2012 @@ -34,6 +34,8 @@ import org.apache.hadoop.hdfs.protocol.E import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; +import com.google.common.annotations.VisibleForTesting; + /** * DataBlockScanner manages block scanning for all the block pools. For each * block pool a {@link BlockPoolSliceScanner} is created which runs in a separate @@ -47,6 +49,8 @@ public class DataBlockScanner implements private final FsDatasetSpi dataset; private final Configuration conf; + static final int SLEEP_PERIOD_MS = 5 * 1000; + /** * Map to find the BlockPoolScanner for a given block pool id. This is updated * when a BPOfferService becomes alive or dies. @@ -68,10 +72,10 @@ public class DataBlockScanner implements String currentBpId = ""; boolean firstRun = true; while (datanode.shouldRun && !Thread.interrupted()) { - //Sleep everytime except in the first interation. + //Sleep everytime except in the first iteration. if (!firstRun) { try { - Thread.sleep(5000); + Thread.sleep(SLEEP_PERIOD_MS); } catch (InterruptedException ex) { // Interrupt itself again to set the interrupt status blockScannerThread.interrupt(); @@ -103,7 +107,7 @@ public class DataBlockScanner implements while ((getBlockPoolSetSize() < datanode.getAllBpOs().length) || (getBlockPoolSetSize() < 1)) { try { - Thread.sleep(5000); + Thread.sleep(SLEEP_PERIOD_MS); } catch (InterruptedException e) { blockScannerThread.interrupt(); return; @@ -249,7 +253,7 @@ public class DataBlockScanner implements LOG.info("Removed bpid="+blockPoolId+" from blockPoolScannerMap"); } - // This method is used for testing + @VisibleForTesting long getBlocksScannedInLastRun(String bpid) throws IOException { BlockPoolSliceScanner bpScanner = getBPScanner(bpid); if (bpScanner == null) { @@ -259,6 +263,16 @@ public class DataBlockScanner implements } } + @VisibleForTesting + long getTotalScans(String bpid) throws IOException { + BlockPoolSliceScanner bpScanner = getBPScanner(bpid); + if (bpScanner == null) { + throw new IOException("Block Pool: "+bpid+" is not running"); + } else { + return bpScanner.getTotalScans(); + } + } + public void start() { blockScannerThread = new Thread(this); blockScannerThread.setDaemon(true); Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMultipleNNDataBlockScanner.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMultipleNNDataBlockScanner.java?rev=1381472&r1=1381471&r2=1381472&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMultipleNNDataBlockScanner.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMultipleNNDataBlockScanner.java Thu Sep 6 06:19:30 2012 @@ -20,8 +20,11 @@ package org.apache.hadoop.hdfs.server.da import java.io.IOException; +import junit.framework.Assert; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -31,7 +34,13 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.server.datanode.BlockPoolSliceScanner; +import static org.apache.hadoop.hdfs.server.datanode.DataBlockScanner.SLEEP_PERIOD_MS; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; import org.junit.Test; +import org.junit.Ignore; +import static org.junit.Assert.fail; public class TestMultipleNNDataBlockScanner { @@ -166,4 +175,75 @@ public class TestMultipleNNDataBlockScan cluster.shutdown(); } } + + @Test + public void test2NNBlockRescanInterval() throws IOException { + ((Log4JLogger)BlockPoolSliceScanner.LOG).getLogger().setLevel(Level.ALL); + Configuration conf = new HdfsConfiguration(); + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(3)) + .build(); + + try { + FileSystem fs = cluster.getFileSystem(1); + Path file2 = new Path("/test/testBlockScanInterval"); + DFSTestUtil.createFile(fs, file2, 30, (short) 1, 0); + + fs = cluster.getFileSystem(0); + Path file1 = new Path("/test/testBlockScanInterval"); + DFSTestUtil.createFile(fs, file1, 30, (short) 1, 0); + for (int i = 0; i < 8; i++) { + LOG.info("Verifying that the blockscanner scans exactly once"); + waitAndScanBlocks(1, 1); + } + } finally { + cluster.shutdown(); + } + } + + /** + * HDFS-3828: DN rescans blocks too frequently + * + * @throws Exception + */ + @Test + public void testBlockRescanInterval() throws IOException { + ((Log4JLogger)BlockPoolSliceScanner.LOG).getLogger().setLevel(Level.ALL); + Configuration conf = new HdfsConfiguration(); + cluster = new MiniDFSCluster.Builder(conf).build(); + + try { + FileSystem fs = cluster.getFileSystem(); + Path file1 = new Path("/test/testBlockScanInterval"); + DFSTestUtil.createFile(fs, file1, 30, (short) 1, 0); + for (int i = 0; i < 4; i++) { + LOG.info("Verifying that the blockscanner scans exactly once"); + waitAndScanBlocks(1, 1); + } + } finally { + cluster.shutdown(); + } + } + + void waitAndScanBlocks(long scansLastRun, long scansTotal) + throws IOException { + // DataBlockScanner will run for every 5 seconds so we are checking for + // every 5 seconds + int n = 5; + String bpid = cluster.getNamesystem(0).getBlockPoolId(); + DataNode dn = cluster.getDataNodes().get(0); + long blocksScanned, total; + do { + try { + Thread.sleep(SLEEP_PERIOD_MS); + } catch (InterruptedException e) { + fail("Interrupted: " + e); + } + blocksScanned = dn.blockScanner.getBlocksScannedInLastRun(bpid); + total = dn.blockScanner.getTotalScans(bpid); + LOG.info("bpid = " + bpid + " blocksScanned = " + blocksScanned + " total=" + total); + } while (n-- > 0 && (blocksScanned != scansLastRun || scansTotal != total)); + Assert.assertEquals(scansTotal, total); + Assert.assertEquals(scansLastRun, blocksScanned); + } }