Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 28BBA200B9D for ; Thu, 13 Oct 2016 18:25:42 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 276C0160AE4; Thu, 13 Oct 2016 16:25:42 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 22E1E160AF6 for ; Thu, 13 Oct 2016 18:25:40 +0200 (CEST) Received: (qmail 4841 invoked by uid 500); 13 Oct 2016 16:25:40 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 4701 invoked by uid 99); 13 Oct 2016 16:25:40 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 13 Oct 2016 16:25:40 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id BEF05E04AF; Thu, 13 Oct 2016 16:25:39 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: stack@apache.org To: commits@hbase.apache.org Message-Id: <0f91da914cd94db683fd1176880b6906@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hbase git commit: HBASE-16698 Performance issue: handlers stuck waiting for CountDownLatch inside WALKey#getWriteEntry under high writing workload Date: Thu, 13 Oct 2016 16:25:39 +0000 (UTC) archived-at: Thu, 13 Oct 2016 16:25:42 -0000 Repository: hbase Updated Branches: refs/heads/master c9c67d1a9 -> 9b1351448 HBASE-16698 Performance issue: handlers stuck waiting for CountDownLatch inside WALKey#getWriteEntry under high writing workload Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/9b135144 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/9b135144 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/9b135144 Branch: refs/heads/master Commit: 9b13514483991889cd6ebe097c3c8eb0e7983e6d Parents: c9c67d1 Author: Michael Stack Authored: Thu Oct 13 09:25:23 2016 -0700 Committer: Michael Stack Committed: Thu Oct 13 09:25:23 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/regionserver/HRegion.java | 85 +++++++++++++++----- .../hbase/regionserver/wal/FSWALEntry.java | 19 +++-- .../org/apache/hadoop/hbase/wal/WALKey.java | 26 ++++++ .../hadoop/hbase/regionserver/TestHRegion.java | 7 +- 4 files changed, 108 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/9b135144/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index ccc92d1..3715ca1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -64,6 +64,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; @@ -197,6 +198,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY = "hbase.hregion.scan.loadColumnFamiliesOnDemand"; + /** Config key for using mvcc pre-assign feature for put */ + public static final String HREGION_MVCC_PRE_ASSIGN = "hbase.hregion.mvcc.preassign"; + public static final boolean DEFAULT_HREGION_MVCC_PRE_ASSIGN = true; + /** * This is the global default value for durability. All tables/mutations not * defining a durability or using USE_DEFAULT will default to this value. @@ -585,6 +590,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // that has non-default scope private final NavigableMap replicationScope = new TreeMap( Bytes.BYTES_COMPARATOR); + // flag and lock for MVCC preassign + private final boolean mvccPreAssign; + private final ReentrantLock preAssignMvccLock; /** * HRegion constructor. This constructor should only be used for testing and @@ -744,6 +752,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi false : conf.getBoolean(HConstants.ENABLE_CLIENT_BACKPRESSURE, HConstants.DEFAULT_ENABLE_CLIENT_BACKPRESSURE); + + // get mvcc pre-assign flag and lock + this.mvccPreAssign = conf.getBoolean(HREGION_MVCC_PRE_ASSIGN, DEFAULT_HREGION_MVCC_PRE_ASSIGN); + if (this.mvccPreAssign) { + this.preAssignMvccLock = new ReentrantLock(); + } else { + this.preAssignMvccLock = null; + } } void setHTableSpecificConf() { @@ -3215,36 +3231,61 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // STEP 4. Append the final edit to WAL and sync. Mutation mutation = batchOp.getMutation(firstIndex); WALKey walKey = null; + long txid; if (replay) { // use wal key from the original walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(), this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc); walKey.setOrigLogSeqNum(batchOp.getReplaySequenceId()); - } - // Not sure what is going on here when replay is going on... does the below append get - // called for replayed edits? Am afraid to change it without test. - if (!walEdit.isEmpty()) { - if (!replay) { - // we use HLogKey here instead of WALKey directly to support legacy coprocessors. - walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(), - this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, - mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, - this.getReplicationScope()); - } - // TODO: Use the doAppend methods below... complicated by the replay stuff above. + if (!walEdit.isEmpty()) { + txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true); + if (txid != 0) { + sync(txid, durability); + } + } + } else { try { - long txid = this.wal.append(this.getRegionInfo(), walKey, - walEdit, true); - if (txid != 0) sync(txid, durability); - writeEntry = walKey.getWriteEntry(); + if (!walEdit.isEmpty()) { + try { + if (this.mvccPreAssign) { + preAssignMvccLock.lock(); + writeEntry = mvcc.begin(); + } + // we use HLogKey here instead of WALKey directly to support legacy coprocessors. + walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(), + this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, + mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, + this.getReplicationScope()); + if (this.mvccPreAssign) { + walKey.setPreAssignedWriteEntry(writeEntry); + } + // TODO: Use the doAppend methods below... complicated by the replay stuff above. + txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true); + } finally { + if (mvccPreAssign) { + preAssignMvccLock.unlock(); + } + } + if (txid != 0) { + sync(txid, durability); + } + if (writeEntry == null) { + // if MVCC not preassigned, wait here until assigned + writeEntry = walKey.getWriteEntry(); + } + } } catch (IOException ioe) { - if (walKey != null) mvcc.complete(walKey.getWriteEntry()); + if (walKey != null && writeEntry == null) { + // the writeEntry is not preassigned and error occurred during append or sync + mvcc.complete(walKey.getWriteEntry()); + } throw ioe; } } if (walKey == null) { - // If no walKey, then skipping WAL or some such. Being an mvcc transaction so sequenceid. + // If no walKey, then not in replay and skipping WAL or some such. Begin an MVCC transaction + // to get sequence id. writeEntry = mvcc.begin(); } @@ -3267,7 +3308,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // STEP 6. Complete mvcc. if (replay) { this.mvcc.advanceTo(batchOp.getReplaySequenceId()); - } else if (writeEntry != null/*Can be null if in replay mode*/) { + } else { + // writeEntry won't be empty if not in replay mode + assert writeEntry != null; mvcc.completeAndWait(writeEntry); writeEntry = null; } @@ -7592,9 +7635,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT + ClassSize.ARRAY + - 49 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + + 50 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + (14 * Bytes.SIZEOF_LONG) + - 5 * Bytes.SIZEOF_BOOLEAN); + 6 * Bytes.SIZEOF_BOOLEAN); // woefully out of date - currently missing: // 1 x HashMap - coprocessorServiceHandlers http://git-wip-us.apache.org/repos/asf/hbase/blob/9b135144/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java index 72474a0..c4546f5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java @@ -112,11 +112,16 @@ class FSWALEntry extends Entry { } stamped = true; long regionSequenceId = WALKey.NO_SEQUENCE_ID; - MultiVersionConcurrencyControl mvcc = getKey().getMvcc(); - MultiVersionConcurrencyControl.WriteEntry we = null; - - if (mvcc != null) { - we = mvcc.begin(); + WALKey key = getKey(); + MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry(); + boolean preAssigned = (we != null); + if (!preAssigned) { + MultiVersionConcurrencyControl mvcc = key.getMvcc(); + if (mvcc != null) { + we = mvcc.begin(); + } + } + if (we != null) { regionSequenceId = we.getWriteNumber(); } @@ -125,7 +130,9 @@ class FSWALEntry extends Entry { CellUtil.setSequenceId(c, regionSequenceId); } } - getKey().setWriteEntry(we); + if (!preAssigned) { + key.setWriteEntry(we); + } return regionSequenceId; } http://git-wip-us.apache.org/repos/asf/hbase/blob/9b135144/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java index 0e35cbe..4f2af38 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FamilyScope; import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ScopeType; import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl; +import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry; import org.apache.hadoop.hbase.regionserver.SequenceId; // imports for things that haven't moved from regionserver.wal yet. import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; @@ -92,6 +93,10 @@ public class WALKey implements SequenceId, Comparable { */ @InterfaceAudience.Private // For internal use only. public MultiVersionConcurrencyControl.WriteEntry getWriteEntry() throws InterruptedIOException { + if (this.preAssignedWriteEntry != null) { + // don't wait for seqNumAssignedLatch if writeEntry is preassigned + return this.preAssignedWriteEntry; + } try { this.sequenceIdAssignedLatch.await(); } catch (InterruptedException ie) { @@ -203,6 +208,7 @@ public class WALKey implements SequenceId, Comparable { * Set in a way visible to multiple threads; e.g. synchronized getter/setters. */ private MultiVersionConcurrencyControl.WriteEntry writeEntry; + private MultiVersionConcurrencyControl.WriteEntry preAssignedWriteEntry = null; public static final List EMPTY_UUIDS = Collections.unmodifiableList(new ArrayList()); // visible for deprecated HLogKey @@ -731,4 +737,24 @@ public class WALKey implements SequenceId, Comparable { this.origLogSeqNum = walKey.getOrigSequenceNumber(); } } + + /** + * @return The preassigned writeEntry, if any + */ + @InterfaceAudience.Private // For internal use only. + public MultiVersionConcurrencyControl.WriteEntry getPreAssignedWriteEntry() { + return this.preAssignedWriteEntry; + } + + /** + * Preassign writeEntry + * @param writeEntry the entry to assign + */ + @InterfaceAudience.Private // For internal use only. + public void setPreAssignedWriteEntry(WriteEntry writeEntry) { + if (writeEntry != null) { + this.preAssignedWriteEntry = writeEntry; + this.sequenceId = writeEntry.getWriteNumber(); + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/9b135144/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 612d6cf..0b8eefa 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -6290,8 +6290,11 @@ public class TestHRegion { @Override public Long answer(InvocationOnMock invocation) throws Throwable { WALKey key = invocation.getArgumentAt(1, WALKey.class); - MultiVersionConcurrencyControl.WriteEntry we = key.getMvcc().begin(); - key.setWriteEntry(we); + MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry(); + if (we == null) { + we = key.getMvcc().begin(); + key.setWriteEntry(we); + } return 1L; }