impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From taras...@apache.org
Subject [16/18] incubator-impala git commit: IMPALA-3817: Ensure filter hash function is the same on all hardware.
Date Thu, 14 Jul 2016 19:05:13 GMT
IMPALA-3817: Ensure filter hash function is the same on all hardware.

If some nodes in a cluster support the hardware CRC instruction, then
those nodes would use that hash function when testing filters. This
change uses Murmur2 always. Murmur2 is slower, but a perf run on a
16-node cluster showed negligible performance differences.

This impacts DataStreamMgr's hash function, but that that also doesn't
show a perf difference.

(Also, remove unused hash function from HashUtil that was made dead
code by an earlier commit.)

Change-Id: Ia09b67f9e987af3e2c8ac12c347b95a7e09ce6fa
Reviewed-on: http://gerrit.cloudera.org:8080/3566
Reviewed-by: Jim Apple <jbapple@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/26266fda
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/26266fda
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/26266fda

Branch: refs/heads/master
Commit: 26266fda4ca00ab0034ae67dfc0dfde26f62ad70
Parents: 79641a8
Author: Jim Apple <jbapple@cloudera.com>
Authored: Sun Jul 3 10:22:44 2016 -0700
Committer: Taras Bobrovytsky <tarasbob@apache.org>
Committed: Thu Jul 14 19:04:45 2016 +0000

----------------------------------------------------------------------
 be/src/runtime/raw-value.inline.h | 69 ++++++++--------------------------
 be/src/util/hash-util.h           |  8 ----
 2 files changed, 16 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/26266fda/be/src/runtime/raw-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.inline.h b/be/src/runtime/raw-value.inline.h
index 2d08294..446ae71 100644
--- a/be/src/runtime/raw-value.inline.h
+++ b/be/src/runtime/raw-value.inline.h
@@ -108,11 +108,7 @@ inline uint32_t RawValue::GetHashValueNonNull<int8_t>(const int8_t*
v,
     const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_TINYINT);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash1(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 1, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 1, seed);
 }
 
 template<>
@@ -120,11 +116,7 @@ inline uint32_t RawValue::GetHashValueNonNull<int16_t>(const int16_t*
v,
     const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_SMALLINT);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash2(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 2, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 2, seed);
 }
 
 template<>
@@ -132,11 +124,7 @@ inline uint32_t RawValue::GetHashValueNonNull<int32_t>(const int32_t*
v,
     const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_INT);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash4(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 4, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 4, seed);
 }
 
 template<>
@@ -144,11 +132,7 @@ inline uint32_t RawValue::GetHashValueNonNull<int64_t>(const int64_t*
v,
     const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_BIGINT);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash8(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 8, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 8, seed);
 }
 
 template<>
@@ -156,11 +140,7 @@ inline uint32_t RawValue::GetHashValueNonNull<float>(const float*
v,
     const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_FLOAT);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash4(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 4, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 4, seed);
 }
 
 template<>
@@ -168,11 +148,7 @@ inline uint32_t RawValue::GetHashValueNonNull<double>(const double*
v,
     const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_DOUBLE);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash8(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 8, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 8, seed);
 }
 
 template<>
@@ -180,14 +156,15 @@ inline uint32_t RawValue::GetHashValueNonNull<impala::StringValue>(
     const impala::StringValue* v,const ColumnType& type, uint32_t seed) {
   DCHECK(v != NULL);
   if (type.type == TYPE_CHAR) {
-    return HashUtil::Hash(StringValue::CharSlotToPtr(
-        reinterpret_cast<const void*>(v), type),type.len, seed);
+    return HashUtil::MurmurHash2_64(
+        StringValue::CharSlotToPtr(reinterpret_cast<const void*>(v), type), type.len,
+        seed);
   } else {
     DCHECK(type.type == TYPE_STRING || type.type == TYPE_VARCHAR);
     if (v->len == 0) {
       return HashUtil::HashCombine32(HASH_VAL_EMPTY, seed);
     }
-    return HashUtil::Hash(v->ptr, v->len, seed);
+    return HashUtil::MurmurHash2_64(v->ptr, v->len, seed);
   }
 }
 
@@ -196,11 +173,7 @@ inline uint32_t RawValue::GetHashValueNonNull<TimestampValue>(
     const TimestampValue* v, const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_TIMESTAMP);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash12(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 12, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 12, seed);
 }
 
 template<>
@@ -208,11 +181,7 @@ inline uint32_t RawValue::GetHashValueNonNull<Decimal4Value>(
     const Decimal4Value* v, const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_DECIMAL);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash4(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 4, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 4, seed);
 }
 
 template<>
@@ -220,11 +189,7 @@ inline uint32_t RawValue::GetHashValueNonNull<Decimal8Value>(
     const Decimal8Value* v, const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_DECIMAL);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash8(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 8, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 8, seed);
 }
 
 template<>
@@ -232,11 +197,7 @@ inline uint32_t RawValue::GetHashValueNonNull<Decimal16Value>(
     const Decimal16Value* v, const ColumnType& type, uint32_t seed) {
   DCHECK_EQ(type.type, TYPE_DECIMAL);
   DCHECK(v != NULL);
-  if (LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
-    return HashUtil::CrcHash16(v, seed);
-  } else {
-    return HashUtil::MurmurHash2_64(v, 16, seed);
-  }
+  return HashUtil::MurmurHash2_64(v, 16, seed);
 }
 
 template<typename T>
@@ -249,6 +210,8 @@ inline uint32_t RawValue::GetHashValue(const T* v, const ColumnType&
type,
 
 inline uint32_t RawValue::GetHashValue(const void* v, const ColumnType& type,
     uint32_t seed) {
+  //The choice of hash function needs to be consistent across all hosts of the cluster.
+
   // Use HashCombine with arbitrary constant to ensure we don't return seed.
   if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/26266fda/be/src/util/hash-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/hash-util.h b/be/src/util/hash-util.h
index 1b49855..d8d7b0c 100644
--- a/be/src/util/hash-util.h
+++ b/be/src/util/hash-util.h
@@ -228,14 +228,6 @@ class HashUtil {
     // Rehash32to32(hash2) is minimal.
     return (static_cast<uint64_t>(hash) * m + a) >> 32;
   }
-
-  static inline uint64_t Rehash32to64(const uint32_t hash) {
-    static const uint64_t m1 = 0x47b6137a44974d91ull, m2 = 0x8824ad5ba2b7289cull,
-                          a1 = 0x705495c62df1424aull, a2 = 0x9efc49475c6bfb31ull;
-    const uint64_t hash1 = (static_cast<uint64_t>(hash) * m1 + a1) >> 32;
-    const uint64_t hash2 = (static_cast<uint64_t>(hash) * m2 + a2) >> 32;
-    return hash1 | (hash2 << 32);
-  }
 };
 
 }


Mime
View raw message