hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gop...@apache.org
Subject hive git commit: HIVE-11457: Vectorization: Improve GenVectorCode string equals intrinsic (Gopal V, reviewed by Matt McCline)
Date Mon, 10 Aug 2015 21:00:48 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 ca0f2b4d1 -> c8f19763c


HIVE-11457: Vectorization: Improve GenVectorCode string equals intrinsic (Gopal V, reviewed
by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c8f19763
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c8f19763
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c8f19763

Branch: refs/heads/branch-1
Commit: c8f19763c3a56ea67d83aaa4c4339355c7d1ca3b
Parents: ca0f2b4
Author: Gopal V <gopalv@apache.org>
Authored: Mon Aug 10 13:55:29 2015 -0700
Committer: Gopal V <gopalv@apache.org>
Committed: Mon Aug 10 13:57:10 2015 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/expressions/StringExpr.java  | 49 +++++++++++++++-----
 1 file changed, 38 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c8f19763/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
index ebeb642..90817a5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
@@ -51,22 +51,49 @@ public class StringExpr {
    * Use lexicographic unsigned byte value order.
    * This is what's used for UTF-8 sort order.
    */
-  public static boolean equal(byte[] arg1, int start1, int len1, byte[] arg2, int start2,
int len2) {
+  public static boolean equal(byte[] arg1, final int start1, final int len1,
+      byte[] arg2, final int start2, final int len2) {
     if (len1 != len2) {
       return false;
     }
-    for (int index1 = start1,
-             index2 = start2;
-         len1 > 0;
-         len1--,
-         index1++,
-         index2++) {
-      // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer.
-      if ((arg1[index1] & 0xff) != (arg2[index2] & 0xff)) {
-        return false;
-      }
+    if (len1 == 0) {
+      return true;
+    }
+
+    // do bounds check for OOB exception
+    if (arg1[start1] != arg2[start2]
+        || arg1[start1 + len1 - 1] != arg2[start2 + len2 - 1]) {
+      return false;
+    }
 
+    if (len1 == len2) {
+      // prove invariant to the compiler: len1 = len2
+      // all array access between (start1, start1+len1) 
+      // and (start2, start2+len2) are valid
+      // no more OOB exceptions are possible
+      final int step = 8;
+      final int remainder = len1 % step;
+      final int wlen = len1 - remainder;
+      // suffix first
+      for (int i = wlen; i < len1; i++) {
+        if (arg1[start1 + i] != arg2[start2 + i]) {
+          return false;
+        }
+      }
+      // SIMD loop
+      for (int i = 0; i < wlen; i += step) {
+        final int s1 = start1 + i;
+        final int s2 = start2 + i;
+        boolean neq = false;
+        for (int j = 0; j < step; j++) {
+          neq = (arg1[s1 + j] != arg2[s2 + j]) || neq;
+        }
+        if (neq) {
+          return false;
+        }
+      }
     }
+
     return true;
   }
 


Mime
View raw message