Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 83CF7CEED for ; Sat, 29 Jun 2013 19:51:28 +0000 (UTC) Received: (qmail 83515 invoked by uid 500); 29 Jun 2013 19:51:28 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 83436 invoked by uid 500); 29 Jun 2013 19:51:27 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 83429 invoked by uid 99); 29 Jun 2013 19:51:27 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 29 Jun 2013 19:51:27 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 29 Jun 2013 19:51:24 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 3A074238889B; Sat, 29 Jun 2013 19:51:03 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1497992 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/regionserver/wal/ test/java/org/apache/hadoop/hbase/regionserver/wal/ Date: Sat, 29 Jun 2013 19:51:03 -0000 To: commits@hbase.apache.org From: liyin@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130629195103.3A074238889B@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: liyin Date: Sat Jun 29 18:18:46 2013 New Revision: 1497992 URL: http://svn.apache.org/r1497992 Log: [0.89-fb] [HBASE-8228] Change the order of locks for HRegion.updatesLock and HLog.cacheFlushLock during a cache flush Author: aaiyer Summary: We have seen cases where HLog.startCacheFlush() may block, waiting for a compaction + log roll to complete; while holding the updatesLock for a region. The waiting is fine. But, it is unacceptable to block all writes to the region if the lock cannot be granted. A long term fix to this is to get rid of the cacheFlushLock, since we no longer write the complete flush entry in the log, and it can be deprecated. For now, just change the order of locks for HRegion.updatesLock and HLog.cacheFlushLock during a cache flush Test Plan: test suite; deploy/run on shadow Reviewers: liyintang Reviewed By: liyintang CC: hbase-eng@, adela Differential Revision: https://phabricator.fb.com/D867128 Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1497992&r1=1497991&r2=1497992&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Sat Jun 29 18:18:46 2013 @@ -1386,6 +1386,12 @@ public class HRegion implements HeapSize // rows then) status.setStatus("Obtaining lock to block concurrent updates"); long t0, t1; + + if (wal != null) { + // We will ask the WAL to avoid rolling the log, until the flush is done. + // This may stall. So, doing this before we get the updatesLock. + wal.startCacheFlush(); + } this.updatesLock.writeLock().lock(); t0 = EnvironmentEdgeManager.currentTimeMillis(); status.setStatus("Preparing to flush by snapshotting stores"); @@ -1395,7 +1401,7 @@ public class HRegion implements HeapSize stores.size()); try { sequenceId = (wal == null)? myseqid : - wal.startCacheFlush(this.regionInfo.getRegionName()); + wal.getStartCacheFlushSeqNum(this.regionInfo.getRegionName()); completeSequenceId = this.getCompleteCacheFlushSequenceId(sequenceId); for (Store s : stores.values()) { storeFlushers.add(s.getStoreFlusher(completeSequenceId)); Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1497992&r1=1497991&r2=1497992&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Sat Jun 29 18:18:46 2013 @@ -1335,13 +1335,19 @@ public class HLog implements Syncable { return outputfiles.size(); } + /** - * By acquiring a log sequence ID, we can allow log messages to continue while - * we flush the cache. - * * Acquire a lock so that we do not roll the log between the start and * completion of a cache-flush. Otherwise the log-seq-id for the flush will * not appear in the correct logfile. + */ + public void startCacheFlush() { + this.cacheFlushLock.readLock().lock(); + } + + /** + * By acquiring a log sequence ID, we can allow log messages to continue while + * we flush the cache. * * In this method, by removing the entry in firstSeqWritten for the region * being flushed we ensure that the next edit inserted in this region will be @@ -1354,8 +1360,7 @@ public class HLog implements Syncable { * @see #completeCacheFlush(byte[], byte[], long, boolean) * @see #abortCacheFlush() */ - public long startCacheFlush(final byte [] regionName) { - this.cacheFlushLock.readLock().lock(); + public long getStartCacheFlushSeqNum(final byte [] regionName) { if (this.firstSeqWrittenInSnapshotMemstore.containsKey(regionName)) { LOG.warn("Requested a startCacheFlush while firstSeqWrittenInSnapshotMemstore still" + " contains " + Bytes.toString(regionName) + " . Did the previous flush fail?" Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java?rev=1497992&r1=1497991&r2=1497992&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java Sat Jun 29 18:18:46 2013 @@ -461,7 +461,8 @@ public class TestHLog { row,Bytes.toBytes(Bytes.toString(row) + "1"), false); final byte [] regionName = info.getRegionName(); log.append(info, tableName, cols, System.currentTimeMillis()); - long logSeqId = log.startCacheFlush(info.getRegionName()); + log.startCacheFlush(); + long logSeqId = log.getStartCacheFlushSeqNum(info.getRegionName()); log.completeCacheFlush(regionName, tableName, logSeqId, info.isMetaRegion()); log.close(); Path filename = log.computeFilename(); @@ -519,7 +520,8 @@ public class TestHLog { HRegionInfo hri = new HRegionInfo(new HTableDescriptor(tableName), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW); log.append(hri, tableName, cols, System.currentTimeMillis()); - long logSeqId = log.startCacheFlush(hri.getRegionName()); + log.startCacheFlush(); + long logSeqId = log.getStartCacheFlushSeqNum(hri.getRegionName()); log.completeCacheFlush(hri.getRegionName(), tableName, logSeqId, false); log.close(); Path filename = log.computeFilename(); @@ -583,7 +585,8 @@ public class TestHLog { // Flush the first region, we expect to see the first two files getting // archived addEdits(log, hri, tableName, 1); - long seqId = log.startCacheFlush(hri.getRegionName()); + log.startCacheFlush(); + long seqId = log.getStartCacheFlushSeqNum(hri.getRegionName()); log.completeCacheFlush(hri.getRegionName(), tableName, seqId, false); log.rollWriter(); assertEquals(2, log.getNumLogFiles()); @@ -591,7 +594,8 @@ public class TestHLog { // Flush the second region, which removes all the remaining output files // since the oldest was completely flushed. addEdits(log, hri2, tableName2, 1); - seqId = log.startCacheFlush(hri2.getRegionName()); + log.startCacheFlush(); + seqId = log.getStartCacheFlushSeqNum(hri2.getRegionName()); log.completeCacheFlush(hri2.getRegionName(), tableName2, seqId, false); log.rollWriter(); assertEquals(0, log.getNumLogFiles()); Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java?rev=1497992&r1=1497991&r2=1497992&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java Sat Jun 29 18:18:46 2013 @@ -420,7 +420,8 @@ public class TestWALReplay { } // Add a cache flush, shouldn't have any effect - long logSeqId = wal.startCacheFlush(regionName); + wal.startCacheFlush(); + long logSeqId = wal.getStartCacheFlushSeqNum(regionName); wal.completeCacheFlush(regionName, tableName, logSeqId, hri.isMetaRegion()); // Add an edit to another family, should be skipped. @@ -646,4 +647,4 @@ public class TestWALReplay { HBaseTestingUtility.setMaxRecoveryErrorCount(wal.getOutputStream(), 1); return wal; } -} \ No newline at end of file +}