Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 930AD1804C for ; Mon, 17 Aug 2015 22:00:45 +0000 (UTC) Received: (qmail 82874 invoked by uid 500); 17 Aug 2015 22:00:32 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 82737 invoked by uid 500); 17 Aug 2015 22:00:32 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 82318 invoked by uid 99); 17 Aug 2015 22:00:31 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 17 Aug 2015 22:00:31 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id CC9DAE01CA; Mon, 17 Aug 2015 22:00:31 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Date: Mon, 17 Aug 2015 22:00:36 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [06/50] [abbrv] hive git commit: HIVE-11457: Vectorization: Improve GenVectorCode string equals intrinsic (Gopal V, reviewed by Matt McCline) HIVE-11457: Vectorization: Improve GenVectorCode string equals intrinsic (Gopal V, reviewed by Matt McCline) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bddbd1da Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bddbd1da Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bddbd1da Branch: refs/heads/hbase-metastore Commit: bddbd1da0c570a4c03f80b695b940a181787c5ca Parents: cfda570 Author: Gopal V Authored: Mon Aug 10 13:55:29 2015 -0700 Committer: Gopal V Committed: Mon Aug 10 13:56:00 2015 -0700 ---------------------------------------------------------------------- .../ql/exec/vector/expressions/StringExpr.java | 49 +++++++++++++++----- 1 file changed, 38 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/bddbd1da/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java index ebeb642..90817a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java @@ -51,22 +51,49 @@ public class StringExpr { * Use lexicographic unsigned byte value order. * This is what's used for UTF-8 sort order. */ - public static boolean equal(byte[] arg1, int start1, int len1, byte[] arg2, int start2, int len2) { + public static boolean equal(byte[] arg1, final int start1, final int len1, + byte[] arg2, final int start2, final int len2) { if (len1 != len2) { return false; } - for (int index1 = start1, - index2 = start2; - len1 > 0; - len1--, - index1++, - index2++) { - // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer. - if ((arg1[index1] & 0xff) != (arg2[index2] & 0xff)) { - return false; - } + if (len1 == 0) { + return true; + } + + // do bounds check for OOB exception + if (arg1[start1] != arg2[start2] + || arg1[start1 + len1 - 1] != arg2[start2 + len2 - 1]) { + return false; + } + if (len1 == len2) { + // prove invariant to the compiler: len1 = len2 + // all array access between (start1, start1+len1) + // and (start2, start2+len2) are valid + // no more OOB exceptions are possible + final int step = 8; + final int remainder = len1 % step; + final int wlen = len1 - remainder; + // suffix first + for (int i = wlen; i < len1; i++) { + if (arg1[start1 + i] != arg2[start2 + i]) { + return false; + } + } + // SIMD loop + for (int i = 0; i < wlen; i += step) { + final int s1 = start1 + i; + final int s2 = start2 + i; + boolean neq = false; + for (int j = 0; j < step; j++) { + neq = (arg1[s1 + j] != arg2[s2 + j]) || neq; + } + if (neq) { + return false; + } + } } + return true; }