quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nav...@apache.org
Subject [3/3] incubator-quickstep git commit: Get rid of Bloom filter random seed
Date Mon, 11 Jul 2016 23:34:33 GMT
Get rid of Bloom filter random seed


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/30824632
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/30824632
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/30824632

Branch: refs/heads/expt_bloom_filter_hash_fn
Commit: 30824632df92feb8c4355d2599d152530f391ff2
Parents: 499b661
Author: Navneet Potti <navsan@gmail.com>
Authored: Mon Jul 11 12:56:31 2016 -0500
Committer: Navneet Potti <navsan@gmail.com>
Committed: Mon Jul 11 12:56:31 2016 -0500

----------------------------------------------------------------------
 storage/BloomFilterIndexSubBlock.cpp   |  4 +---
 storage/BloomFilterIndexSubBlock.hpp   |  6 ------
 storage/HashTable.hpp                  |  3 +--
 utility/BloomFilter.hpp                | 27 +++++----------------------
 utility/BloomFilter.proto              |  6 ++----
 utility/tests/BloomFilter_unittest.cpp |  9 +++------
 6 files changed, 12 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/30824632/storage/BloomFilterIndexSubBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/BloomFilterIndexSubBlock.cpp b/storage/BloomFilterIndexSubBlock.cpp
index e806217..a40f69f 100644
--- a/storage/BloomFilterIndexSubBlock.cpp
+++ b/storage/BloomFilterIndexSubBlock.cpp
@@ -55,7 +55,6 @@ BloomFilterIndexSubBlock::BloomFilterIndexSubBlock(const TupleStorageSubBlock
&t
                     sub_block_memory_size),
       is_initialized_(false),
       is_consistent_(false),
-      random_seed_(kBloomFilterSeed),
       bit_array_size_in_bytes_(description.GetExtension(
                                    BloomFilterIndexSubBlockDescription::bloom_filter_size))
{
   CHECK(DescriptionIsValid(relation_, description_))
@@ -74,8 +73,7 @@ BloomFilterIndexSubBlock::BloomFilterIndexSubBlock(const TupleStorageSubBlock
&t
   const std::uint32_t salt_count = description.GetExtension(BloomFilterIndexSubBlockDescription::number_of_hashes);
 
   // Initialize the bloom_filter_ data structure to operate on bit_array.
-  bloom_filter_.reset(new BloomFilter(random_seed_,
-                                      salt_count,
+  bloom_filter_.reset(new BloomFilter(salt_count,
                                       bit_array_size_in_bytes_,
                                       bit_array_.get(),
                                       is_bloom_filter_initialized));

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/30824632/storage/BloomFilterIndexSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/BloomFilterIndexSubBlock.hpp b/storage/BloomFilterIndexSubBlock.hpp
index 4925673..8c81156 100644
--- a/storage/BloomFilterIndexSubBlock.hpp
+++ b/storage/BloomFilterIndexSubBlock.hpp
@@ -65,11 +65,6 @@ class BloomFilterIndexSubBlock : public IndexSubBlock {
     kSelectivityNone
   };
 
-  /**
-   * @brief A random seed to initialize the bloom filter hash functions.
-   **/
-  static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
-
   BloomFilterIndexSubBlock(const TupleStorageSubBlock &tuple_store,
                            const IndexSubBlockDescription &description,
                            const bool new_block,
@@ -179,7 +174,6 @@ class BloomFilterIndexSubBlock : public IndexSubBlock {
  private:
   bool is_initialized_;
   bool is_consistent_;
-  const std::uint64_t random_seed_;
   const std::uint64_t bit_array_size_in_bytes_;
   std::vector<attribute_id> indexed_attribute_ids_;
   std::unique_ptr<unsigned char> bit_array_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/30824632/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index 2100430..1e153f2 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -1481,8 +1481,7 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy,
al
     }
     std::unique_ptr<BloomFilter> thread_local_bloom_filter;
     if (has_build_side_bloom_filter_) {
-      thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getRandomSeed(),
-                                                      build_bloom_filter_->getNumberOfHashes(),
+      thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getNumberOfHashes(),
                                                       build_bloom_filter_->getBitArraySize()));
     }
     if (resizable) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/30824632/utility/BloomFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.hpp b/utility/BloomFilter.hpp
index 4125292..332cb0f 100644
--- a/utility/BloomFilter.hpp
+++ b/utility/BloomFilter.hpp
@@ -57,15 +57,12 @@ class BloomFilter {
    * @note When no bit_array is being passed to the constructor,
    *       then the bit_array is owned and managed by this class.
    *
-   * @param random_seed A random_seed that generates unique hash functions.
    * @param hash_fn_count The number of hash functions used by this bloom filter.
    * @param bit_array_size_in_bytes Size of the bit array.
    **/
-  BloomFilter(const std::uint64_t random_seed,
-              const std::size_t hash_fn_count,
+  BloomFilter(const std::size_t hash_fn_count,
               const std::uint64_t bit_array_size_in_bytes)
-      : random_seed_(random_seed),
-        hash_fn_count_(hash_fn_count),
+      : hash_fn_count_(hash_fn_count),
         array_size_in_bytes_(bit_array_size_in_bytes),
         array_size_(array_size_in_bytes_ * kNumBitsPerByte),
         bit_array_(new std::uint8_t[array_size_in_bytes_]),
@@ -78,20 +75,17 @@ class BloomFilter {
    * @note When a bit_array is passed as an argument to the constructor,
    *       then the ownership of the bit array lies with the caller.
    *
-   * @param random_seed A random_seed that generates unique hash functions.
    * @param hash_fn_count The number of hash functions used by this bloom filter.
    * @param bit_array_size_in_bytes Size of the bit array.
    * @param bit_array A pointer to the memory region that is used to store bit array.
    * @param is_initialized A boolean that indicates whether to zero-out the region
    *                       before use or not.
    **/
-  BloomFilter(const std::uint64_t random_seed,
-              const std::size_t hash_fn_count,
+  BloomFilter(const std::size_t hash_fn_count,
               const std::uint64_t bit_array_size_in_bytes,
               std::uint8_t *bit_array,
               const bool is_initialized)
-      : random_seed_(random_seed),
-        hash_fn_count_(hash_fn_count),
+      : hash_fn_count_(hash_fn_count),
         array_size_in_bytes_(bit_array_size_in_bytes),
         array_size_(bit_array_size_in_bytes * kNumBitsPerByte),
         bit_array_(bit_array),  // Owned by the calling method.
@@ -110,8 +104,7 @@ class BloomFilter {
    *        bloom filter configuration.
    **/
   explicit BloomFilter(const serialization::BloomFilter &bloom_filter_proto)
-      : random_seed_(bloom_filter_proto.bloom_filter_seed()),
-        hash_fn_count_(bloom_filter_proto.number_of_hashes()),
+      : hash_fn_count_(bloom_filter_proto.number_of_hashes()),
         array_size_in_bytes_(bloom_filter_proto.bloom_filter_size()),
         array_size_(array_size_in_bytes_ * kNumBitsPerByte),
         bit_array_(new std::uint8_t[array_size_in_bytes_]),
@@ -144,15 +137,6 @@ class BloomFilter {
   }
 
   /**
-   * @brief Get the random seed that was used to initialize this bloom filter.
-   *
-   * @return Returns the random seed.
-   **/
-  inline std::uint64_t getRandomSeed() const {
-    return random_seed_;
-  }
-
-  /**
    * @brief Get the number of hash functions used in this bloom filter.
    *
    * @return Returns the number of hash functions.
@@ -320,7 +304,6 @@ class BloomFilter {
   }
 
  private:
-  const std::uint64_t random_seed_;
   std::vector<std::uint32_t> hash_fn_;
   const std::uint32_t hash_fn_count_;
   std::uint64_t array_size_in_bytes_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/30824632/utility/BloomFilter.proto
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.proto b/utility/BloomFilter.proto
index 8dd9163..b5d14a9 100644
--- a/utility/BloomFilter.proto
+++ b/utility/BloomFilter.proto
@@ -21,10 +21,8 @@ message BloomFilter {
   // The default values were determined from empirical experiments.
   // These values control the amount of false positivity that
   // is expected from Bloom Filter.
-  // - Default seed for initializing family of hashes = 0xA5A5A5A55A5A5A5A.
   // - Default bloom filter size = 10 KB.
   // - Default number of hash functions used in bloom filter = 5.
-  optional fixed64 bloom_filter_seed = 1 [default = 0xA5A5A5A55A5A5A5A];
-  optional uint32 bloom_filter_size = 2 [default = 10000];
-  optional uint32 number_of_hashes = 3 [default = 5];
+  optional uint32 bloom_filter_size = 1 [default = 10000];
+  optional uint32 number_of_hashes = 2 [default = 5];
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/30824632/utility/tests/BloomFilter_unittest.cpp
----------------------------------------------------------------------
diff --git a/utility/tests/BloomFilter_unittest.cpp b/utility/tests/BloomFilter_unittest.cpp
index 4d8beae..03ac9f1 100644
--- a/utility/tests/BloomFilter_unittest.cpp
+++ b/utility/tests/BloomFilter_unittest.cpp
@@ -28,14 +28,13 @@ namespace quickstep {
 
 class BloomFilterTest : public ::testing::Test {
  public:
-  static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
   static const std::uint32_t kNumberOfHashFunctions = 20;
   static const std::uint32_t kBloomFilterSize = 100000;  // in bytes.
 };
 
 TEST_F(BloomFilterTest, BloomFilterInsertTest) {
   // This test inserts an element into a known bloom filter size
-  // with given number of hash functions and initial seed,
+  // with given number of hash functions,
   // and tests whether the expected bits are set or not.
   std::unique_ptr<std::uint8_t> bit_array;
   std::unique_ptr<BloomFilter> bloom_filter;
@@ -44,8 +43,7 @@ TEST_F(BloomFilterTest, BloomFilterInsertTest) {
   const std::uint32_t bloom_filter_size = 1;
 
   bit_array.reset(new std::uint8_t[bloom_filter_size]);
-  bloom_filter.reset(new BloomFilter(kBloomFilterSeed,
-                                     num_hashes,
+  bloom_filter.reset(new BloomFilter(num_hashes,
                                      bloom_filter_size,
                                      bit_array.get(),
                                      false));
@@ -61,8 +59,7 @@ TEST_F(BloomFilterTest, BloomFilterContainsTest) {
   std::unique_ptr<BloomFilter> bloom_filter;
 
   bit_array.reset(new std::uint8_t[kBloomFilterSize]);
-  bloom_filter.reset(new BloomFilter(kBloomFilterSeed,
-                                     kNumberOfHashFunctions,
+  bloom_filter.reset(new BloomFilter(kNumberOfHashFunctions,
                                      kBloomFilterSize,
                                      bit_array.get(),
                                      false));


Mime
View raw message