spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject [1/3] git commit: Fixes to AppendOnlyMap:
Date Sun, 24 Nov 2013 03:02:29 GMT
Updated Branches:
  refs/heads/master 51aa9d6e9 -> 718cc803f


Fixes to AppendOnlyMap:

- Use Murmur Hash 3 finalization step to scramble the bits of HashCode
  instead of the simpler version in java.util.HashMap; the latter one
  had trouble with ranges of consecutive integers. Murmur Hash 3 is used
  by fastutil.
- Use Object.equals() instead of Scala's == to compare keys, because the
  latter does extra casts for numeric types (see the equals method in
  https://github.com/scala/scala/blob/master/src/library/scala/runtime/BoxesRunTime.java)


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/7535d7fb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/7535d7fb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/7535d7fb

Branch: refs/heads/master
Commit: 7535d7fbcbe3c0c2515a2d17a806fa523917e398
Parents: 51aa9d6
Author: Matei Zaharia <matei@eecs.berkeley.edu>
Authored: Sat Nov 23 17:21:37 2013 -0800
Committer: Matei Zaharia <matei@eecs.berkeley.edu>
Committed: Sat Nov 23 17:21:37 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/util/AppendOnlyMap.scala    | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7535d7fb/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
index f60deaf..8542541 100644
--- a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
@@ -56,7 +56,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K,
V)] wi
     var i = 1
     while (true) {
       val curKey = data(2 * pos)
-      if (k.eq(curKey) || k == curKey) {
+      if (k.eq(curKey) || k.equals(curKey)) {
         return data(2 * pos + 1).asInstanceOf[V]
       } else if (curKey.eq(null)) {
         return null.asInstanceOf[V]
@@ -104,7 +104,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K,
V)] wi
     var i = 1
     while (true) {
       val curKey = data(2 * pos)
-      if (k.eq(curKey) || k == curKey) {
+      if (k.eq(curKey) || k.equals(curKey)) {
         val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
         data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
         return newValue
@@ -167,12 +167,11 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K,
V)] wi
   }
 
   /**
-   * Re-hash a value to deal better with hash functions that don't differ
-   * in the lower bits, similar to java.util.HashMap
+   * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
+   * We use the Murmur Hash 3 finalization step that's also used in fastutil.
    */
   private def rehash(h: Int): Int = {
-    val r = h ^ (h >>> 20) ^ (h >>> 12)
-    r ^ (r >>> 7) ^ (r >>> 4)
+    it.unimi.dsi.fastutil.HashCommon.murmurHash3(h)
   }
 
   /**
@@ -190,7 +189,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K,
V)] wi
         data(2 * pos) = key
         data(2 * pos + 1) = value.asInstanceOf[AnyRef]
         return true
-      } else if (curKey.eq(key) || curKey == key) {
+      } else if (curKey.eq(key) || curKey.equals(key)) {
         data(2 * pos + 1) = value.asInstanceOf[AnyRef]
         return false
       } else {


Mime
View raw message