kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aw...@apache.org
Subject [kudu] branch master updated: util: helper class for working with bitsets
Date Thu, 28 Mar 2019 00:31:22 GMT
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 60ea656  util: helper class for working with bitsets
60ea656 is described below

commit 60ea656b314a0daec41a0e14fd25ff2c55c3f4b5
Author: Andrew Wong <awong@apache.org>
AuthorDate: Mon Mar 25 19:00:21 2019 -0700

    util: helper class for working with bitsets
    
    Adds a templatized helper class to facilitate working with bitsets. This
    is just a wrapper around std::bitset, but it exposes a more
    container-like interface. This is particularly useful for specifying
    containers of enum types and the like (e.g. rather than using a hashed
    container).
    
    This also adds an iterator class for the new wrapper; such an iterator
    apparently doesn't exist[1] in the STL bitset class.
    
    [1] https://stackoverflow.com/a/34728458
    
    Change-Id: Ie0344fe94e9f9da9651164cb1b456c92d99dbdf4
    Reviewed-on: http://gerrit.cloudera.org:8080/12866
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <adar@cloudera.com>
    Reviewed-by: Alexey Serbin <aserbin@cloudera.com>
---
 src/kudu/util/CMakeLists.txt |   2 +-
 src/kudu/util/bitset-test.cc | 170 ++++++++++++++++++++++++++++++++++++
 src/kudu/util/bitset.h       | 202 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 373 insertions(+), 1 deletion(-)

diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt
index d08e0ee..312739e 100644
--- a/src/kudu/util/CMakeLists.txt
+++ b/src/kudu/util/CMakeLists.txt
@@ -149,7 +149,6 @@ set(UTIL_SRCS
   bitmap.cc
   block_cache_metrics.cc
   bloom_filter.cc
-  bitmap.cc
   cache.cc
   coding.cc
   condition_variable.cc
@@ -394,6 +393,7 @@ ADD_KUDU_TEST(async_util-test)
 ADD_KUDU_TEST(atomic-test)
 ADD_KUDU_TEST(bit-util-test)
 ADD_KUDU_TEST(bitmap-test)
+ADD_KUDU_TEST(bitset-test)
 ADD_KUDU_TEST(blocking_queue-test)
 ADD_KUDU_TEST(bloom_filter-test)
 ADD_KUDU_TEST(cache-bench RUN_SERIAL true)
diff --git a/src/kudu/util/bitset-test.cc b/src/kudu/util/bitset-test.cc
new file mode 100644
index 0000000..b12fd16
--- /dev/null
+++ b/src/kudu/util/bitset-test.cc
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/bitset.h"
+
+#include <cstddef>
+#include <set>
+#include <string>
+#include <utility>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/test_macros.h"
+
+using std::set;
+using std::string;
+using strings::Substitute;
+
+namespace {
+
+enum TestEnum {
+  BEGINNING,
+  MIDDLE,
+  END,
+};
+constexpr size_t kMaxEnumVal = TestEnum::END + 1;
+
+typedef set<TestEnum> EnumSet;
+typedef FixedBitSet<TestEnum, kMaxEnumVal> EnumBitSet;
+
+const EnumSet kFullEnumSet({
+  TestEnum::BEGINNING,
+  TestEnum::MIDDLE,
+  TestEnum::END,
+});
+
+bool CompareContainers(EnumSet enum_set, EnumBitSet ebs) {
+  if (enum_set.empty() != ebs.empty()) {
+    LOG(INFO) << Substitute("enum set is $0, bitset is $1",
+                            enum_set.empty() ? "empty" : "not empty",
+                            ebs.empty() ? "empty" : "not empty");
+    return false;
+  }
+  if (enum_set.size() != ebs.size()) {
+    LOG(INFO) << Substitute("enum set has $0 elements, bitset has $1",
+                            enum_set.size(), ebs.size());
+    return false;
+  }
+  for (const auto& e : enum_set) {
+    if (!ContainsKey(ebs, e)) {
+      LOG(INFO) << Substitute("enum set contains $0, not found in bitset", e);
+      return false;
+    }
+  }
+  for (TestEnum e : ebs) {
+    if (!ContainsKey(enum_set, e)) {
+      LOG(INFO) << Substitute("bitset contains $0, not found in enum set", e);
+      return false;
+    }
+  }
+  return true;
+}
+
+} // anonymous namespace
+
+TEST(BitSetTest, TestConstruction) {
+  EnumSet enum_set;
+  for (int i = 0; i < kMaxEnumVal; i++) {
+    InsertOrDie(&enum_set, static_cast<TestEnum>(i));
+    EnumBitSet ebs(enum_set);
+    ASSERT_TRUE(CompareContainers(enum_set, ebs));
+  }
+}
+
+TEST(BitSetTest, TestInitializerList) {
+  EnumBitSet ebs({ TestEnum::BEGINNING });
+  ASSERT_TRUE(ContainsKey(ebs, TestEnum::BEGINNING));
+  ASSERT_EQ(1, ebs.size());
+}
+
+// Test basic operations for a bitset of enums, comparing is to an STL
+// container of enums.
+TEST(BitSetTest, TestBasicOperations) {
+  EnumBitSet bitset;
+  ASSERT_TRUE(bitset.empty());
+
+  EnumSet enum_set;
+  const auto add_to_containers = [&] (TestEnum e) {
+    ASSERT_EQ(InsertIfNotPresent(&enum_set, e),
+              InsertIfNotPresent(&bitset, e));
+  };
+  const auto remove_from_containers = [&] (TestEnum e) {
+    ASSERT_EQ(enum_set.erase(e), bitset.erase(e));
+  };
+  // Insert all elements, checking to make sure our containers' contents remain
+  // the same.
+  for (const auto& e : kFullEnumSet) {
+    NO_FATALS(add_to_containers(e));
+    ASSERT_TRUE(CompareContainers(enum_set, bitset));
+  }
+
+  // Do a sanity check that we can't insert something that already exists in
+  // the set.
+  ASSERT_FALSE(InsertIfNotPresent(&bitset, TestEnum::BEGINNING));
+
+  // Now remove all elements.
+  for (const auto& e : kFullEnumSet) {
+    NO_FATALS(remove_from_containers(e));
+    ASSERT_TRUE(CompareContainers(enum_set, bitset));
+  }
+
+  // Do a final sanity check that the bitset looks how we expect.
+  ASSERT_TRUE(CompareContainers(enum_set, bitset));
+  ASSERT_TRUE(bitset.empty());
+}
+
+// Test the set's insert interface.
+TEST(BitSetTest, TestInsert) {
+  EnumBitSet bitset;
+  {
+    auto iter_and_inserted = bitset.insert(TestEnum::BEGINNING);
+    ASSERT_EQ(TestEnum::BEGINNING, *iter_and_inserted.first);
+    ASSERT_TRUE(iter_and_inserted.second);
+  }
+  {
+    auto iter_and_inserted = bitset.insert(TestEnum::BEGINNING);
+    ASSERT_EQ(TestEnum::BEGINNING, *iter_and_inserted.first);
+    ASSERT_FALSE(iter_and_inserted.second);
+  }
+}
+
+#ifndef NDEBUG
+// Make sure we hit a check failure if we attempt to use the bitset with values
+// outside of its range.
+TEST(BitSetDeathTest, TestInvalidUsage) {
+  const string kDeathMessage = "Check failed";
+  const TestEnum kOutOfRange = TestEnum(kMaxEnumVal);
+  EnumBitSet bitset;
+  EXPECT_DEATH({
+    bitset.insert(kOutOfRange);
+  }, kDeathMessage);
+
+  EXPECT_DEATH({
+    bitset.erase(kOutOfRange);
+  }, kDeathMessage);
+
+  EXPECT_DEATH({
+    bitset.contains(kOutOfRange);
+  }, kDeathMessage);
+
+  ASSERT_TRUE(bitset.empty());
+}
+#endif // NDEBUG
diff --git a/src/kudu/util/bitset.h b/src/kudu/util/bitset.h
new file mode 100644
index 0000000..61a6274
--- /dev/null
+++ b/src/kudu/util/bitset.h
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <bitset>
+#include <cstddef>
+#include <initializer_list>
+#include <iterator>
+#include <utility>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/macros.h"
+
+// Utility template for working with a bitset to make it feel more like a
+// container. E.g., this is useful for building containers for enum types,
+// instead of using a hashed container.
+//
+// This supports operating on 'IntType' types with values ranging from [0,
+// MaxVals). The underlying bitset is of size 'MaxVals', which must be known at
+// compile time.
+//
+// Under the hood, std::bitset uses word-sized bitwise instructions on
+// stack-allocated bytes, so the expectation is that 'MaxVals' is not many
+// times larger than the word-size. A size limit of 64 bits is enforced at
+// compile time.
+template <typename IntType, size_t MaxVals>
+class FixedBitSet {
+ public:
+  // These types are exposed to match those provided by STL containers, which
+  // allows template instantiations to be used by map utility functions.
+  class iterator;
+  typedef IntType key_type;
+  typedef IntType value_type;
+
+  // Constructs an empty FixedBitSet.
+  FixedBitSet() {}
+
+  // Constructs a new FixedBitSet from an initializer list.
+  FixedBitSet(std::initializer_list<IntType> list) {
+    for (const IntType& val : list) {
+      insert(val);
+    }
+  }
+
+  // Constructs a new FixedBitSet from a container.
+  template <typename Container>
+  explicit FixedBitSet(const Container& c) {
+    for (const IntType& val : c) {
+      insert(val);
+    }
+  }
+
+  // Inserts 'val' into the set.
+  std::pair<iterator, bool> insert(const IntType& val) {
+    DCHECK_LT(val, MaxVals);
+    bool not_present = !contains(val);
+    if (not_present) {
+      bitset_.set(static_cast<size_t>(val));
+    }
+    return { iterator(this, static_cast<int>(val)), not_present };
+  }
+
+  // Removes 'val' from the set if it exists.
+  size_t erase(const IntType val) {
+    DCHECK_LT(val, MaxVals);
+    bool not_present = !contains(val);
+    if (not_present) {
+      return 0;
+    }
+    bitset_.set(static_cast<size_t>(val), false);
+    return 1;
+  }
+
+  // Returns whether 'val' exists in the set.
+  bool contains(const IntType& val) const {
+    DCHECK_LT(val, MaxVals);
+    return bitset_.test(val);
+  }
+
+  // Returns whether the set is empty.
+  bool empty() const {
+    return bitset_.none();
+  }
+
+  // Clears the contents of the set.
+  void clear() {
+    bitset_.reset();
+  }
+
+  // Returns the number of set bits.
+  size_t size() const {
+    return bitset_.count();
+  }
+
+  // Resets the set to have the contents of the container of int-typed values.
+  template <typename C>
+  void reset(const C& container) {
+    clear();
+    for (const IntType& item : container) {
+      insert(item);
+    }
+  }
+
+  // Forward iterator that points to the start of the values in the bitset.
+  // Points at the first set bit, or at end() if no bits are set.
+  iterator begin() const {
+    iterator iter(this, -1);
+    iter.seek_forward();
+    return iter;
+  }
+
+  // Forward iterator that points to the end of the values in the bitset.
+  iterator end() const {
+    return iterator(this, MaxVals);
+  }
+
+  // Forward iterator that points at the element 'val' if it exists, or at
+  // end() if it doesn't exist.
+  iterator find(const IntType& val) const {
+    return contains(val) ? iterator(this, val) : end();
+  }
+
+ private:
+  COMPILE_ASSERT(MaxVals < 64, bitset_size_too_large);
+  std::bitset<MaxVals> bitset_;
+};
+
+// Forward iterator class for a FixedBitSet.
+template <typename IntType, size_t MaxVals>
+class FixedBitSet<IntType, MaxVals>::iterator :
+    public std::iterator<std::forward_iterator_tag, IntType> {
+ public:
+  // Returns the value currently pointed at by this iterator.
+  IntType operator*() {
+    return static_cast<IntType>(idx_);
+  }
+
+  // Prefix increment operator. Advances the iterator to the next position and
+  // returns it.
+  iterator& operator++() {
+    seek_forward();
+    return *this;
+  }
+
+  // Postfix increment operator. Advances the iterator to the next position,
+  // returning a non-iterated iterator.
+  iterator operator++(int) {
+    iterator iter_copy = *this;
+    seek_forward();
+    return iter_copy;
+  }
+
+  // Returns whether this iterator is the same as 'other'.
+  bool operator==(const iterator& other) const {
+    return (idx_ == other.idx_) && (fbs_ == other.fbs_);
+  }
+
+  // Returns whether this iterator is not the same as 'other'.
+  bool operator!=(const iterator& other) const {
+    return !(*this == other);
+  }
+
+ private:
+  friend class FixedBitSet<IntType, MaxVals>;
+  iterator(const FixedBitSet<IntType, MaxVals>* fbs, int idx)
+      : fbs_(fbs),
+        idx_(idx) {}
+
+  // Seeks forward to the next set bit, or until at the end of the iterator.
+  void seek_forward() {
+    if (idx_ == MaxVals) {
+      return;
+    }
+    while (++idx_ < MaxVals) {
+      if (fbs_->contains(static_cast<IntType>(idx_))) {
+        break;
+      }
+    }
+  }
+
+  // The underlying FixedBitSet that this iterator is iterating over.
+  const FixedBitSet<IntType, MaxVals>* const fbs_;
+
+  // This iterator's current position.
+  int idx_;
+};
+


Mime
View raw message