Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 3F5F0200B4F for ; Tue, 12 Jul 2016 01:34:37 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 3DE58160A85; Mon, 11 Jul 2016 23:34:37 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 5EB51160A78 for ; Tue, 12 Jul 2016 01:34:36 +0200 (CEST) Received: (qmail 96967 invoked by uid 500); 11 Jul 2016 23:34:35 -0000 Mailing-List: contact commits-help@quickstep.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@quickstep.incubator.apache.org Delivered-To: mailing list commits@quickstep.incubator.apache.org Received: (qmail 96958 invoked by uid 99); 11 Jul 2016 23:34:35 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd4-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 11 Jul 2016 23:34:35 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd4-us-west.apache.org (ASF Mail Server at spamd4-us-west.apache.org) with ESMTP id 2D9BFC0283 for ; Mon, 11 Jul 2016 23:34:35 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd4-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.646 X-Spam-Level: X-Spam-Status: No, score=-4.646 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-1.426] autolearn=disabled Received: from mx2-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd4-us-west.apache.org [10.40.0.11]) (amavisd-new, port 10024) with ESMTP id i7QHXtQlCkLJ for ; Mon, 11 Jul 2016 23:34:33 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx2-lw-eu.apache.org (ASF Mail Server at mx2-lw-eu.apache.org) with SMTP id B84C05F479 for ; Mon, 11 Jul 2016 23:34:32 +0000 (UTC) Received: (qmail 96892 invoked by uid 99); 11 Jul 2016 23:34:31 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 11 Jul 2016 23:34:31 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id BAC4FE032D; Mon, 11 Jul 2016 23:34:31 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: navsan@apache.org To: commits@quickstep.incubator.apache.org Date: Mon, 11 Jul 2016 23:34:32 -0000 Message-Id: <3d1136c057b248988d155b2581acea01@git.apache.org> In-Reply-To: <6e518fa02c3e416fb4a7ad1cf15dc7cc@git.apache.org> References: <6e518fa02c3e416fb4a7ad1cf15dc7cc@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [2/3] incubator-quickstep git commit: Use multiplicative hash instead of hash_ap archived-at: Mon, 11 Jul 2016 23:34:37 -0000 Use multiplicative hash instead of hash_ap Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/499b6613 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/499b6613 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/499b6613 Branch: refs/heads/expt_bloom_filter_hash_fn Commit: 499b6613db75fdac1e96a622bc365549722eee47 Parents: 3e52dbd Author: Navneet Potti Authored: Mon Jul 11 12:45:26 2016 -0500 Committer: Navneet Potti Committed: Mon Jul 11 12:45:26 2016 -0500 ---------------------------------------------------------------------- utility/BloomFilter.hpp | 104 ++++++++----------------------------------- 1 file changed, 18 insertions(+), 86 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/499b6613/utility/BloomFilter.hpp ---------------------------------------------------------------------- diff --git a/utility/BloomFilter.hpp b/utility/BloomFilter.hpp index b93df84..4125292 100644 --- a/utility/BloomFilter.hpp +++ b/utility/BloomFilter.hpp @@ -71,7 +71,6 @@ class BloomFilter { bit_array_(new std::uint8_t[array_size_in_bytes_]), is_bit_array_owner_(true) { reset(); - generate_unique_hash_fn(); } /** @@ -100,7 +99,6 @@ class BloomFilter { if (!is_initialized) { reset(); } - generate_unique_hash_fn(); } /** @@ -119,7 +117,6 @@ class BloomFilter { bit_array_(new std::uint8_t[array_size_in_bytes_]), is_bit_array_owner_(true) { reset(); - generate_unique_hash_fn(); } /** @@ -198,7 +195,7 @@ class BloomFilter { // Determine all the bit positions that are required to be set. for (std::size_t i = 0; i < hash_fn_count_; ++i) { - compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit); + compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit); modified_bit_positions.push_back(std::make_pair(bit_index, bit)); } @@ -243,7 +240,7 @@ class BloomFilter { std::size_t bit = 0; for (std::size_t i = 0; i < hash_fn_count_; ++i) { - compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit); + compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit); (bit_array_.get())[bit_index / kNumBitsPerByte] |= (1 << bit); } @@ -265,7 +262,7 @@ class BloomFilter { std::size_t bit_index = 0; std::size_t bit = 0; for (std::size_t i = 0; i < hash_fn_count_; ++i) { - compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit); + compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit); if (((bit_array_.get())[bit_index / kNumBitsPerByte] & (1 << bit)) != (1 << bit)) { return false; } @@ -301,88 +298,23 @@ class BloomFilter { *bit = *bit_index % kNumBitsPerByte; } - void generate_unique_hash_fn() { - hash_fn_.reserve(hash_fn_count_); - const std::uint32_t predef_hash_fn_count = 128; - static const std::uint32_t predef_hash_fn[predef_hash_fn_count] = { - 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC, - 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B, - 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66, - 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA, - 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99, - 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33, - 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5, - 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000, - 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F, - 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63, - 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7, - 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492, - 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A, - 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B, - 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3, - 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432, - 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC, - 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB, - 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331, - 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68, - 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8, - 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A, - 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF, - 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E, - 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39, - 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E, - 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355, - 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E, - 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79, - 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075, - 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC, - 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421 - }; - if (hash_fn_count_ <= predef_hash_fn_count) { - std::copy(predef_hash_fn, predef_hash_fn + hash_fn_count_, hash_fn_.begin()); - for (std::uint32_t i = 0; i < hash_fn_.size(); ++i) { - hash_fn_[i] = hash_fn_[i] * hash_fn_[(i + 3) % hash_fn_count_] + static_cast(random_seed_); + inline std::uint32_t hash_multiplicative( + const std::uint8_t *begin, + std::size_t remaining_length, + const std::size_t multiplier) const { + std::uint32_t hash = 0; + std::size_t bytes_hashed = 0; + if (remaining_length >= 4) { + while (bytes_hashed < remaining_length) { + auto val = *reinterpret_cast(begin + bytes_hashed); + hash = multiplier * hash + val; + bytes_hashed += 4; } - } else { - LOG(FATAL) << "Requested number of hash functions is too large."; - } - } - - inline std::uint32_t hash_ap(const std::uint8_t *begin, std::size_t remaining_length, std::uint32_t hash) const { - const std::uint8_t *itr = begin; - std::uint32_t loop = 0; - while (remaining_length >= 8) { - const std::uint32_t &i1 = *(reinterpret_cast(itr)); itr += sizeof(std::uint32_t); - const std::uint32_t &i2 = *(reinterpret_cast(itr)); itr += sizeof(std::uint32_t); - hash ^= (hash << 7) ^ i1 * (hash >> 3) ^ (~((hash << 11) + (i2 ^ (hash >> 5)))); - remaining_length -= 8; } - if (remaining_length) { - if (remaining_length >= 4) { - const std::uint32_t &i = *(reinterpret_cast(itr)); - if (loop & 0x01) { - hash ^= (hash << 7) ^ i * (hash >> 3); - } else { - hash ^= (~((hash << 11) + (i ^ (hash >> 5)))); - } - ++loop; - remaining_length -= 4; - itr += sizeof(std::uint32_t); - } - if (remaining_length >= 2) { - const std::uint16_t &i = *(reinterpret_cast(itr)); - if (loop & 0x01) { - hash ^= (hash << 7) ^ i * (hash >> 3); - } else { - hash ^= (~((hash << 11) + (i ^ (hash >> 5)))); - } - ++loop; - remaining_length -= 2; - itr += sizeof(std::uint16_t); - } - if (remaining_length) { - hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop; - } + while (bytes_hashed < remaining_length) { + std::uint8_t val = *(begin + bytes_hashed); + hash = multiplier * hash + val; + bytes_hashed++; } return hash; }