quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zu...@apache.org
Subject [01/50] [abbrv] incubator-quickstep git commit: Improve locking mechanism for bloom filters which were leading to higher lock contention between worker threads during bloom filter insertion
Date Fri, 27 May 2016 03:22:52 GMT
Repository: incubator-quickstep
Updated Branches:
  refs/heads/Test-Storage-Manager [created] c8f63d2e0
  refs/heads/bloom-optimized-joins [created] c6f11d860
  refs/heads/bloom-optimized-joins-experimental [created] f133fd502
  refs/heads/bloom-optimized-selection [created] 9e1fdaab6
  refs/heads/bloom-optimized-selection-profiling [created] 9e1fdaab6
  refs/heads/change-aggregation-hashtable [created] 70fcdb5d6
  refs/heads/deadlock-fix-approach-1 [created] 08904c32f
  refs/heads/distributed-prototype [created] 9285aefc7
  refs/heads/explicit_byproduct_for_gperftools [created] 931136a73
  refs/heads/improve-text-scan-operator [created] 85d7a3363
  refs/heads/increase_default_blocksize_to_32MB [created] 2c1a2722f
  refs/heads/insert-dest-per-thread [created] a203643a2
  refs/heads/operator-selection-heuristics [created] 7805b33f3
  refs/heads/partition_aware_hashjoin [created] c664d39b4
  refs/heads/partition_aware_hashjoin_with_optimizer_support [created] 791a2cacd
  refs/heads/partition_aware_selection [created] 0c0ad24ee
  refs/heads/preloader-cleanup [created] f189257b2
  refs/heads/query-id-operator-workorder [created] d2841af73
  refs/heads/query-manager-used-in-foreman 90e3c105a -> 24c93ca5a
  refs/heads/quickstep_date_support [created] e8e92cffc
  refs/heads/quickstep_gen_stats [created] eb76b4021
  refs/heads/quickstep_partition_parser_support [created] fb919fef9
  refs/heads/remove-bitweaving-from-gitmodules-file [created] 1b1ff7dfc
  refs/heads/revert-85-StorageManager_concurrency_bug [created] 0f762c3a1
  refs/heads/scalar-subquery-expression [created] 793da561e
  refs/heads/stop_preloading_when_bp_is_full [created] 1edc21708
  refs/heads/substring_function [created] d3e64c928
  refs/heads/tmp-relation-col-store [created] e62dba059
  refs/heads/travis-opt [created] 8b9053cab
  refs/heads/use-ccache [created] f6a770629


Improve locking mechanism for bloom filters which were leading to higher lock contention between
worker threads during bloom filter insertion


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/c6f11d86
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/c6f11d86
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/c6f11d86

Branch: refs/heads/bloom-optimized-joins
Commit: c6f11d860cbeb8c4ab39bccc3db3ebde5053833f
Parents: f5f24e7
Author: Saket Saurabh <ssaurabh@cs.wisc.edu>
Authored: Fri Apr 29 00:27:18 2016 -0500
Committer: Saket Saurabh <ssaurabh@cs.wisc.edu>
Committed: Fri Apr 29 00:27:18 2016 -0500

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt          |   1 +
 query_optimizer/ExecutionGenerator.cpp  |  24 +++---
 query_optimizer/ExecutionHeuristics.cpp |  27 ++++++-
 query_optimizer/ExecutionHeuristics.hpp |  44 ++++++++---
 storage/HashTable.hpp                   |  18 ++++-
 utility/BloomFilter.hpp                 | 108 ++++++++++++++++++++++++++-
 6 files changed, 189 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c6f11d86/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 7669490..3644400 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -143,6 +143,7 @@ if (ENABLE_DISTRIBUTED)
 endif()
 target_link_libraries(quickstep_queryoptimizer_ExecutionHeuristics
                       glog
+                      quickstep_catalog_CatalogRelation
                       quickstep_catalog_CatalogTypedefs
                       quickstep_queryexecution_QueryContext_proto
                       quickstep_queryoptimizer_QueryPlan

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c6f11d86/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 5d67272..5b5dea9 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -575,8 +575,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
   std::vector<attribute_id> probe_original_attribute_ids;
   std::vector<attribute_id> build_original_attribute_ids;
 
-  relation_id probe_relation_id;
-  relation_id build_relation_id;
+  const CatalogRelation *referenced_stored_probe_relation;
+  const CatalogRelation *referenced_stored_build_relation;
 
   bool any_probe_attributes_nullable = false;
   bool any_build_attributes_nullable = false;
@@ -587,15 +587,14 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
       physical_plan->left_join_attributes();
   for (const E::AttributeReferencePtr &left_join_attribute : left_join_attributes) {
     // Try to determine the original stored relation referenced in the Hash Join.
-    const CatalogRelation *referenced_stored_catalog_relation =
+    referenced_stored_probe_relation =
         optimizer_context_->catalog_database()->getRelationByName(left_join_attribute->relation_name());
-    if (referenced_stored_catalog_relation == nullptr) {
+    if (referenced_stored_probe_relation == nullptr) {
       // Hash Join optimizations are not possible, if the referenced relation cannot be determined.
       skip_hash_join_optimization = true;
     } else {
-      probe_relation_id = referenced_stored_catalog_relation->getID();
       const attribute_id probe_operator_attribute_id =
-      referenced_stored_catalog_relation->getAttributeByName(left_join_attribute->attribute_name())->getID();
+          referenced_stored_probe_relation->getAttributeByName(left_join_attribute->attribute_name())->getID();
       probe_original_attribute_ids.emplace_back(probe_operator_attribute_id);
     }
 
@@ -612,15 +611,14 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
       physical_plan->right_join_attributes();
   for (const E::AttributeReferencePtr &right_join_attribute : right_join_attributes)
{
     // Try to determine the original stored relation referenced in the Hash Join.
-    const CatalogRelation *referenced_stored_catalog_relation =
+    referenced_stored_build_relation =
         optimizer_context_->catalog_database()->getRelationByName(right_join_attribute->relation_name());
-    if (referenced_stored_catalog_relation == nullptr) {
+    if (referenced_stored_build_relation == nullptr) {
       // Hash Join optimizations are not possible, if the referenced relation cannot be determined.
       skip_hash_join_optimization = true;
     } else {
-      build_relation_id = referenced_stored_catalog_relation->getID();
       const attribute_id build_operator_attribute_id =
-      referenced_stored_catalog_relation->getAttributeByName(right_join_attribute->attribute_name())->getID();
+          referenced_stored_build_relation->getAttributeByName(right_join_attribute->attribute_name())->getID();
       build_original_attribute_ids.emplace_back(build_operator_attribute_id);
     }
 
@@ -660,7 +658,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
       std::swap(probe_attribute_ids, build_attribute_ids);
       std::swap(any_probe_attributes_nullable, any_build_attributes_nullable);
       std::swap(probe_original_attribute_ids, build_original_attribute_ids);
-      std::swap(probe_relation_id, build_relation_id);
+      std::swap(referenced_stored_probe_relation, referenced_stored_build_relation);
     }
   }
 
@@ -820,8 +818,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
   if (FLAGS_optimize_joins && !skip_hash_join_optimization) {
     execution_heuristics_->addHashJoinInfo(build_operator_index,
                                            join_operator_index,
-                                           build_relation_id,
-                                           probe_relation_id,
+                                           referenced_stored_build_relation,
+                                           referenced_stored_probe_relation,
                                            &build_original_attribute_ids,
                                            &probe_original_attribute_ids,
                                            join_hash_table_index);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c6f11d86/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
index dfbdad1..b181607 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -52,8 +52,8 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
     std::vector<std::size_t> chained_nodes;
     chained_nodes.push_back(origin_node);
     for (std::size_t i = origin_node + 1; i < hash_joins_.size(); ++i) {
-      const relation_id checked_relation_id = hash_joins_[origin_node].probe_relation_id_;
-      const relation_id expected_relation_id = hash_joins_[i].probe_relation_id_;
+      const relation_id checked_relation_id = hash_joins_[origin_node].referenced_stored_probe_relation_->getID();
+      const relation_id expected_relation_id = hash_joins_[i].referenced_stored_probe_relation_->getID();
       if (checked_relation_id == expected_relation_id) {
         chained_nodes.push_back(i);
       } else {
@@ -67,7 +67,10 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
       for (std::size_t &i : chained_nodes) {
         // Provision for a new bloom filter to be used by the build operator.
         const QueryContext::bloom_filter_id bloom_filter_id =  query_context_proto->bloom_filters_size();
-        query_context_proto->add_bloom_filters();
+        serialization::BloomFilter *bloom_filter_proto = query_context_proto->add_bloom_filters();
+
+        // Modify the bloom filter properties based on the statistics of the relation.
+        setBloomFilterProperties(bloom_filter_proto, hash_joins_[i].referenced_stored_build_relation_);
 
         // Add build-side bloom filter information to the corresponding hash table proto.
         query_context_proto->mutable_join_hash_tables(hash_joins_[i].join_hash_table_id_)
@@ -101,5 +104,23 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
   }
 }
 
+void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
+                                                   const CatalogRelation *relation) {
+  const std::size_t cardinality = relation->estimateTupleCardinality();
+  if (cardinality < kOneThousand) {
+    bloom_filter_proto->set_bloom_filter_size(kOneThousand / kCompressionFactor);
+    bloom_filter_proto->set_number_of_hashes(kVeryLowSparsityHash);
+  } else if (cardinality >= kOneThousand && cardinality < kTenThousand) {
+    bloom_filter_proto->set_bloom_filter_size(kTenThousand / kCompressionFactor);
+    bloom_filter_proto->set_number_of_hashes(kLowSparsityHash);
+  } else if (cardinality >= kTenThousand && cardinality < kHundredThousand)
{
+    bloom_filter_proto->set_bloom_filter_size(kHundredThousand / kCompressionFactor);
+    bloom_filter_proto->set_number_of_hashes(kMediumSparsityHash);
+  } else if (cardinality >= kHundredThousand) {
+    bloom_filter_proto->set_bloom_filter_size(kMillion / kCompressionFactor);
+    bloom_filter_proto->set_number_of_hashes(kHighSparsityHash);
+  }
+}
+
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c6f11d86/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp b/query_optimizer/ExecutionHeuristics.hpp
index 8ca1164..9406091 100644
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ b/query_optimizer/ExecutionHeuristics.hpp
@@ -20,6 +20,7 @@
 
 #include <vector>
 
+#include "catalog/CatalogRelation.hpp"
 #include "catalog/CatalogTypedefs.hpp"
 #include "query_execution/QueryContext.pb.h"
 #include "query_optimizer/QueryPlan.hpp"
@@ -45,15 +46,15 @@ class HashJoinInfo {
  public:
   HashJoinInfo(QueryPlan::DAGNodeIndex build_operator_index,
                QueryPlan::DAGNodeIndex join_operator_index,
-               const relation_id &build_relation_id,
-               const relation_id &probe_relation_id,
+               const CatalogRelation *referenced_stored_build_relation,
+               const CatalogRelation *referenced_stored_probe_relation,
                std::vector<attribute_id> &&build_attributes,
                std::vector<attribute_id> &&probe_attributes,
                const QueryContext::join_hash_table_id &join_hash_table_id)
     : build_operator_index_(build_operator_index),
       join_operator_index_(join_operator_index),
-      build_relation_id_(build_relation_id),
-      probe_relation_id_(probe_relation_id),
+      referenced_stored_build_relation_(referenced_stored_build_relation),
+      referenced_stored_probe_relation_(referenced_stored_probe_relation),
       build_attributes_(build_attributes),
       probe_attributes_(probe_attributes),
       join_hash_table_id_(join_hash_table_id) {
@@ -61,8 +62,8 @@ class HashJoinInfo {
 
   const QueryPlan::DAGNodeIndex build_operator_index_;
   const QueryPlan::DAGNodeIndex join_operator_index_;
-  const relation_id build_relation_id_;
-  const relation_id probe_relation_id_;
+  const CatalogRelation *referenced_stored_build_relation_;
+  const CatalogRelation *referenced_stored_probe_relation_;
   const std::vector<attribute_id> build_attributes_;
   const std::vector<attribute_id> probe_attributes_;
   const QueryContext::join_hash_table_id join_hash_table_id_;
@@ -79,6 +80,19 @@ typedef execution_heuristics_internal::HashJoinInfo HashJoinInfo;
  **/
 class ExecutionHeuristics {
  public:
+  static const std::size_t kOneHundred = 100;
+  static const std::size_t kOneThousand = 1000;
+  static const std::size_t kTenThousand = 10000;
+  static const std::size_t kHundredThousand = 100000;
+  static const std::size_t kMillion = 1000000;
+
+  static const std::size_t kCompressionFactor = 10;
+
+  static const std::size_t kVeryLowSparsityHash = 1;
+  static const std::size_t kLowSparsityHash = 2;
+  static const std::size_t kMediumSparsityHash = 5;
+  static const std::size_t kHighSparsityHash = 10;
+
   /**
    * @brief Constructor.
    **/
@@ -99,15 +113,15 @@ class ExecutionHeuristics {
    **/
   inline void addHashJoinInfo(QueryPlan::DAGNodeIndex build_operator_index,
                               QueryPlan::DAGNodeIndex join_operator_index,
-                              const relation_id build_relation_id,
-                              const relation_id probe_relation_id,
+                              const CatalogRelation *referenced_stored_build_relation,
+                              const CatalogRelation *referenced_stored_probe_relation,
                               std::vector<attribute_id> *build_attributes,
                               std::vector<attribute_id> *probe_attributes,
                               const QueryContext::join_hash_table_id &join_hash_table_id)
{
     hash_joins_.push_back(HashJoinInfo(build_operator_index,
                                        join_operator_index,
-                                       build_relation_id,
-                                       probe_relation_id,
+                                       referenced_stored_build_relation,
+                                       referenced_stored_probe_relation,
                                        std::move(*build_attributes),
                                        std::move(*probe_attributes),
                                        join_hash_table_id));
@@ -123,6 +137,16 @@ class ExecutionHeuristics {
    **/
   void optimizeExecutionPlan(QueryPlan *query_plan, serialization::QueryContext *query_context_proto);
 
+  /**
+   * @brief Set the properties of the bloom filter proto based on the statistics
+   *        of the given relation.
+   *
+   * @param bloom_filter_proto A mutable reference to the bloom filter protobuf representation.
+   * @param relation The catalog relation on which bloom filter is being built..
+   **/
+  void setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
+                                const CatalogRelation *relation);
+
  private:
   std::vector<HashJoinInfo> hash_joins_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c6f11d86/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index a047dba..97f7a77 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -1477,6 +1477,12 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy,
al
                                                         &prealloc_state);
       }
     }
+    std::unique_ptr<BloomFilter> thread_local_bloom_filter;
+    if (has_build_side_bloom_filter_) {
+      thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getRandomSeed(),
+                                                      build_bloom_filter_->getNumberOfHashes(),
+                                                      build_bloom_filter_->getBitArraySize()));
+    }
     if (resizable) {
       while (result == HashTablePutResult::kOutOfSpace) {
         {
@@ -1494,8 +1500,8 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy,
al
                                        using_prealloc ? &prealloc_state : nullptr);
             // Insert into bloom filter, if enabled.
             if (has_build_side_bloom_filter_) {
-              this->build_bloom_filter_->insert(static_cast<const std::uint8_t *>(key.getDataPtr()),
-                                                key.getDataSize());
+              thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t
*>(key.getDataPtr()),
+                                                      key.getDataSize());
             }
             if (result == HashTablePutResult::kDuplicateKey) {
               DEBUG_ASSERT(!using_prealloc);
@@ -1524,14 +1530,18 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy,
al
                                    using_prealloc ? &prealloc_state : nullptr);
         // Insert into bloom filter, if enabled.
         if (has_build_side_bloom_filter_) {
-          this->build_bloom_filter_->insert(static_cast<const std::uint8_t *>(key.getDataPtr()),
-                                            key.getDataSize());
+          thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()),
+                                                  key.getDataSize());
         }
         if (result != HashTablePutResult::kOK) {
           return result;
         }
       }
     }
+    // Update the build side bloom filter with thread local copy, if available.
+    if (has_build_side_bloom_filter_) {
+      build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get());
+    }
 
     return HashTablePutResult::kOK;
   });

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c6f11d86/utility/BloomFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.hpp b/utility/BloomFilter.hpp
index 33c2ebf..a173253 100644
--- a/utility/BloomFilter.hpp
+++ b/utility/BloomFilter.hpp
@@ -27,6 +27,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "storage/StorageConstants.hpp"
@@ -133,21 +134,106 @@ class BloomFilter {
   }
 
   /**
-   * @brief Inserts a given value into the bloom filter.
+   * @brief Get the random seed that was used to initialize this bloom filter.
+   *
+   * @return Returns the random seed.
+   **/
+  inline std::uint64_t getRandomSeed() const {
+    return random_seed_;
+  }
+
+  /**
+   * @brief Get the number of hash functions used in this bloom filter.
+   *
+   * @return Returns the number of hash functions.
+   **/
+  inline std::uint32_t getNumberOfHashes() const {
+    return hash_fn_count_;
+  }
+
+  /**
+   * @brief Get the size of the bit array in bytes for this bloom filter.
+   *
+   * @return Returns the bit array size (in bytes).
+   **/
+  inline std::uint64_t getBitArraySize() const {
+    return array_size_in_bytes_;
+  }
+
+  /**
+   * @brief Get the constant pointer to the bit array for this bloom filter
+   *
+   * @return Returns constant pointer to the bit array.
+   **/
+  inline const std::uint8_t* getBitArray() const {
+    return bit_array_.get();
+  }
+
+  /**
+   * @brief Inserts a given value into the bloom filter in a thread-safe manner.
    *
    * @param key_begin A pointer to the value being inserted.
    * @param length Size of the value being inserted in bytes.
    */
-  inline void insert(const std::uint8_t *key_begin, const std::size_t &length) {
+  inline void insert(const std::uint8_t *key_begin, const std::size_t length) {
     // Locks are needed during insertion, when multiple workers may be modifying the
     // bloom filter concurrently. However, locks are not required during membership test.
-    SpinSharedMutexExclusiveLock<false> lock(bloom_filter_insert_mutex_);
     std::size_t bit_index = 0;
     std::size_t bit = 0;
+    std::vector<std::pair<std::size_t, std::size_t>> modified_bit_positions;
+    std::vector<bool> is_bit_position_correct;
+
+    // Determine all the bit positions that are required to be set.
+    for (std::size_t i = 0; i < hash_fn_count_; ++i) {
+      compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
+      modified_bit_positions.push_back(std::make_pair(bit_index, bit));
+    }
+
+    // Acquire a reader lock and check which of the bit positions are already set.
+    {
+      SpinSharedMutexSharedLock<false> shared_reader_lock(bloom_filter_insert_mutex_);
+      for (std::size_t i = 0; i < hash_fn_count_; ++i) {
+        bit_index = modified_bit_positions[i].first;
+        bit = modified_bit_positions[i].second;
+        if (((bit_array_.get())[bit_index / kNumBitsPerByte] & (1 << bit)) != (1
<< bit)) {
+          is_bit_position_correct.push_back(false);
+        } else {
+          is_bit_position_correct.push_back(true);
+        }
+      }
+    }
+
+    // Acquire a writer lock and set the bit positions are which are not set.
+    {
+      SpinSharedMutexExclusiveLock<false> exclusive_writer_lock(bloom_filter_insert_mutex_);
+      for (std::size_t i = 0; i < hash_fn_count_; ++i) {
+        if (!is_bit_position_correct[i]) {
+          bit_index = modified_bit_positions[i].first;
+          bit = modified_bit_positions[i].second;
+          (bit_array_.get())[bit_index / kNumBitsPerByte] |= (1 << bit);
+        }
+      }
+    }
+    ++inserted_element_count_;
+  }
+
+  /**
+   * @brief Inserts a given value into the bloom filter.
+   * @Warning This is a faster thread-unsafe version of the insert() function.
+   *          The caller needs to ensure the thread safety.
+   *
+   * @param key_begin A pointer to the value being inserted.
+   * @param length Size of the value being inserted in bytes.
+   */
+  inline void insertUnSafe(const std::uint8_t *key_begin, const std::size_t length) {
+    std::size_t bit_index = 0;
+    std::size_t bit = 0;
+
     for (std::size_t i = 0; i < hash_fn_count_; ++i) {
       compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
       (bit_array_.get())[bit_index / kNumBitsPerByte] |= (1 << bit);
     }
+
     ++inserted_element_count_;
   }
 
@@ -156,6 +242,9 @@ class BloomFilter {
    *        If true is returned, then a value may or may not be present in the bloom filter.
    *        If false is returned, a value is certainly not present in the bloom filter.
    *
+   * @note The membersip test does not require any locks, because the assumption is that
+   *       the bloom filter will only be used after it has been built.
+   *
    * @param key_begin A pointer to the value being tested for membership.
    * @param length Size of the value being inserted in bytes.
    */
@@ -172,6 +261,19 @@ class BloomFilter {
   }
 
   /**
+   * @brief Perform a bitwise-OR of the given Bloom filter with this bloom filter.
+   *        Essentially, it does a union of this bloom filter with the passed bloom filter.
+   *
+   * @param bloom_filter A const pointer to the bloom filter object to do bitwise-OR with.
+   */
+  inline void bitwiseOr(const BloomFilter *bloom_filter) {
+    SpinSharedMutexExclusiveLock<false> exclusive_writer_lock(bloom_filter_insert_mutex_);
+    for (std::size_t byte_index = 0; byte_index < bloom_filter->getBitArraySize();
++byte_index) {
+      (bit_array_.get())[byte_index] |= bloom_filter->getBitArray()[byte_index];
+    }
+  }
+
+  /**
    * @brief Return the number of elements currently inserted into bloom filter.
    *
    * @return The number of elements inserted into bloom filter.


Mime
View raw message