Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 2E628200C72 for ; Thu, 27 Apr 2017 22:21:08 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 2CF28160BA7; Thu, 27 Apr 2017 20:21:08 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 4FC31160B9E for ; Thu, 27 Apr 2017 22:21:07 +0200 (CEST) Received: (qmail 75584 invoked by uid 500); 27 Apr 2017 20:21:06 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 75556 invoked by uid 99); 27 Apr 2017 20:21:05 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 27 Apr 2017 20:21:05 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 636BEDFDAC; Thu, 27 Apr 2017 20:21:05 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: larsh@apache.org To: commits@hbase.apache.org Message-Id: <8a43b52da5a242a39cf5e935fc322d5d@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hbase git commit: HBASE-17877 Improve HBase's byte[] comparator. Date: Thu, 27 Apr 2017 20:21:05 +0000 (UTC) archived-at: Thu, 27 Apr 2017 20:21:08 -0000 Repository: hbase Updated Branches: refs/heads/master 880db3eee -> b81e00f5e HBASE-17877 Improve HBase's byte[] comparator. Signed-off-by: Lars Hofhansl Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b81e00f5 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b81e00f5 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b81e00f5 Branch: refs/heads/master Commit: b81e00f5eabe8d99fd77d74f60e3754add8205da Parents: 880db3e Author: Vikas Vishwakarma Authored: Thu Apr 27 13:21:07 2017 -0700 Committer: Lars Hofhansl Committed: Thu Apr 27 13:21:07 2017 -0700 ---------------------------------------------------------------------- NOTICE.txt | 4 +- .../hadoop/hbase/util/ByteBufferUtils.java | 55 +++++++++----------- .../org/apache/hadoop/hbase/util/Bytes.java | 55 +++++++++----------- 3 files changed, 54 insertions(+), 60 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/b81e00f5/NOTICE.txt ---------------------------------------------------------------------- diff --git a/NOTICE.txt b/NOTICE.txt index fb16a28..71efa52 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -38,8 +38,10 @@ Copyright Jan Kovařík Licensed under the Apache License v2.0 as a part of the Bootstrap project. -- -This product includes portions of the Guava project v14, specifically +This product includes portions of the Guava project v14 and v21, specifically 'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java' +'hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java' +'hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java' Copyright (C) 2007 The Guava Authors http://git-wip-us.apache.org/repos/asf/hbase/blob/b81e00f5/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java index 34a4e02..6dac300 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java @@ -701,46 +701,43 @@ public final class ByteBufferUtils { } static int compareToUnsafe(Object obj1, long o1, int l1, Object obj2, long o2, int l2) { + final int stride = 8; final int minLength = Math.min(l1, l2); - final int minWords = minLength / Bytes.SIZEOF_LONG; + int strideLimit = minLength & ~(stride - 1); + int i; /* * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower than * comparing 4 bytes at a time even on 32-bit. On the other hand, it is substantially faster on * 64-bit. */ - int j = minWords << 3; // Same as minWords * SIZEOF_LONG - for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) { - long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + i); - long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + i); - long diff = lw ^ rw; - if (diff != 0) { - return lessThanUnsignedLong(lw, rw) ? -1 : 1; + for (i = 0; i < strideLimit; i += stride) { + long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + (long) i); + long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + (long) i); + if (lw != rw) { + if (!UnsafeAccess.littleEndian) { + return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; + } + + /* + * We want to compare only the first index where left[index] != right[index]. This + * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are + * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant + * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get + * that least significant nonzero byte. This comparison logic is based on UnsignedBytes + * from guava v21 + */ + int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7; + return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF)); } } - int offset = j; - if (minLength - offset >= Bytes.SIZEOF_INT) { - int il = UnsafeAccess.theUnsafe.getInt(obj1, o1 + offset); - int ir = UnsafeAccess.theUnsafe.getInt(obj2, o2 + offset); + // The epilogue to cover the last (minLength % stride) elements. + for (; i < minLength; i++) { + int il = (UnsafeAccess.theUnsafe.getByte(obj1, o1 + i) & 0xFF); + int ir = (UnsafeAccess.theUnsafe.getByte(obj2, o2 + i) & 0xFF); if (il != ir) { - return lessThanUnsignedInt(il, ir) ? -1 : 1; - } - offset += Bytes.SIZEOF_INT; - } - if (minLength - offset >= Bytes.SIZEOF_SHORT) { - short sl = UnsafeAccess.theUnsafe.getShort(obj1, o1 + offset); - short sr = UnsafeAccess.theUnsafe.getShort(obj2, o2 + offset); - if (sl != sr) { - return lessThanUnsignedShort(sl, sr) ? -1 : 1; - } - offset += Bytes.SIZEOF_SHORT; - } - if (minLength - offset == 1) { - int a = (UnsafeAccess.theUnsafe.getByte(obj1, o1 + offset) & 0xff); - int b = (UnsafeAccess.theUnsafe.getByte(obj2, o2 + offset) & 0xff); - if (a != b) { - return a - b; + return il - ir; } } return l1 - l2; http://git-wip-us.apache.org/repos/asf/hbase/blob/b81e00f5/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 704d97f..a481d27 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -1575,47 +1575,42 @@ public class Bytes implements Comparable { length1 == length2) { return 0; } + final int stride = 8; final int minLength = Math.min(length1, length2); - final int minWords = minLength / SIZEOF_LONG; + int strideLimit = minLength & ~(stride - 1); final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; + int i; /* - * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a - * time is no slower than comparing 4 bytes at a time even on 32-bit. - * On the other hand, it is substantially faster on 64-bit. + * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower + * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit. */ - // This is the end offset of long parts. - int j = minWords << 3; // Same as minWords * SIZEOF_LONG - for (int i = 0; i < j; i += SIZEOF_LONG) { + for (i = 0; i < strideLimit; i += stride) { long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i); long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i); - long diff = lw ^ rw; - if (diff != 0) { - return lessThanUnsignedLong(lw, rw) ? -1 : 1; + if (lw != rw) { + if(!UnsafeAccess.littleEndian) { + return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; + } + + /* + * We want to compare only the first index where left[index] != right[index]. This + * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are + * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant + * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get + * that least significant nonzero byte. This comparison logic is based on UnsignedBytes + * comparator from guava v21 + */ + int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7; + return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF)); } } - int offset = j; - if (minLength - offset >= SIZEOF_INT) { - int il = theUnsafe.getInt(buffer1, offset1Adj + offset); - int ir = theUnsafe.getInt(buffer2, offset2Adj + offset); - if (il != ir) { - return lessThanUnsignedInt(il, ir) ? -1: 1; - } - offset += SIZEOF_INT; - } - if (minLength - offset >= SIZEOF_SHORT) { - short sl = theUnsafe.getShort(buffer1, offset1Adj + offset); - short sr = theUnsafe.getShort(buffer2, offset2Adj + offset); - if (sl != sr) { - return lessThanUnsignedShort(sl, sr) ? -1: 1; - } - offset += SIZEOF_SHORT; - } - if (minLength - offset == 1) { - int a = (buffer1[(int)(offset1 + offset)] & 0xff); - int b = (buffer2[(int)(offset2 + offset)] & 0xff); + // The epilogue to cover the last (minLength % stride) elements. + for (; i < minLength; i++) { + int a = (buffer1[offset1 + i] & 0xFF); + int b = (buffer2[offset2 + i] & 0xFF); if (a != b) { return a - b; }