hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject [08/43] hive git commit: HIVE-12354 : MapJoin with double keys is slow on MR (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Date Tue, 17 Nov 2015 20:19:19 GMT
HIVE-12354 : MapJoin with double keys is slow on MR (Sergey Shelukhin, reviewed by Prasanth
Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/526c507b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/526c507b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/526c507b

Branch: refs/heads/master-fixed
Commit: 526c507b03da70c83691f8c3729b080c5c0e1380
Parents: 954baa8
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Mon Nov 9 16:32:31 2015 -0800
Committer: Owen O'Malley <omalley@apache.org>
Committed: Tue Nov 17 12:18:30 2015 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/KeyWrapperFactory.java  | 20 +--------------
 .../ql/exec/persistence/MapJoinKeyObject.java   |  6 ++---
 .../objectinspector/ObjectInspectorUtils.java   | 26 ++++++++++++++++++++
 3 files changed, 29 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/526c507b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
index 1c409a2..5154a5f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
@@ -105,25 +105,7 @@ public class KeyWrapperFactory {
 
     @Override
     public void setHashKey() {
-      if (keys == null) {
-        hashcode = 0;
-      } else {
-        hashcode = 1;
-        for (Object element : keys) {
-          hashcode = 31 * hashcode;
-          if(element != null) {
-            if(element instanceof LazyDouble) {
-              long v = Double.doubleToLongBits(((LazyDouble)element).getWritableObject().get());
-              hashcode = hashcode + (int) (v ^ (v >>> 32));
-            } else if (element instanceof DoubleWritable){
-              long v = Double.doubleToLongBits(((DoubleWritable)element).get());
-              hashcode = hashcode + (int) (v ^ (v >>> 32));
-            } else {
-              hashcode = hashcode + element.hashCode();
-            }
-          }
-        }
-      }
+      hashcode = ObjectInspectorUtils.writableArrayHashCode(keys);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/526c507b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
index e1fd6d3..7592f9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
@@ -78,11 +78,9 @@ public class MapJoinKeyObject extends MapJoinKey {
 
   @Override
   public int hashCode() {
-    final int prime = 31;
-    int result = 1;
-    result = prime * result + Arrays.hashCode(key);
-    return result;
+    return ObjectInspectorUtils.writableArrayHashCode(key);
   }
+
   @Override
   public boolean equals(Object obj) {
     if (this == obj)

http://git-wip-us.apache.org/repos/asf/hive/blob/526c507b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 56597a2..7a13eb0 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
 import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
 import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.lazy.LazyDouble;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector;
@@ -77,6 +78,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectIn
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.util.StringUtils;
 
@@ -104,6 +106,30 @@ public final class ObjectInspectorUtils {
   }
 
   /**
+   * Calculates the hash code for array of Objects that contains writables. This is used
+   * to work around the buggy Hadoop DoubleWritable hashCode implementation. This should
+   * only be used for process-local hash codes; don't replace stored hash codes like bucketing.
+   */
+  public static int writableArrayHashCode(Object[] keys) {
+    if (keys == null) return 0;
+    int hashcode = 1;
+    for (Object element : keys) {
+      hashcode = 31 * hashcode;
+      if (element == null) continue;
+      if (element instanceof LazyDouble) {
+        long v = Double.doubleToLongBits(((LazyDouble)element).getWritableObject().get());
+        hashcode = hashcode + (int) (v ^ (v >>> 32));
+      } else if (element instanceof DoubleWritable){
+        long v = Double.doubleToLongBits(((DoubleWritable)element).get());
+        hashcode = hashcode + (int) (v ^ (v >>> 32));
+      } else {
+        hashcode = hashcode + element.hashCode();
+      }
+    }
+    return hashcode;
+  }
+
+  /**
    * Ensures that an ObjectInspector is Writable.
    */
   public static ObjectInspector getWritableObjectInspector(ObjectInspector oi) {


Mime
View raw message