kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [kudu] branch master updated: bitmap: faster implementation of iteration over a bitmap
Date Tue, 24 Mar 2020 17:25:52 GMT
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 5b290cd  bitmap: faster implementation of iteration over a bitmap
5b290cd is described below

commit 5b290cd4439d19ad8e5773cf69f7b994adc1f2e9
Author: Todd Lipcon <todd@apache.org>
AuthorDate: Mon Mar 23 13:18:42 2020 -0700

    bitmap: faster implementation of iteration over a bitmap
    
    This new implementation iterates 64 bits at a time instead of 8,
    and uses a templated callback function instead of using an "iterator"
    approach. In an upcoming patch, I found this method to be measurably
    faster than the previous TrueBitIterator code.
    
    I compared the included benchmark using the old TrueBitIterator vs the
    new implementation:
    
    Old:
     Performance counter stats for 'build/latest/bin/bitmap-test --gtest_filter=*Bench* --gtest_repeat=10':
    
              1,263.17 msec task-clock                #    0.998 CPUs utilized
                   254      context-switches          #    0.201 K/sec
                     0      cpu-migrations            #    0.000 K/sec
                 1,808      page-faults               #    0.001 M/sec
         3,561,984,981      cycles                    #    2.820 GHz
         5,878,012,694      instructions              #    1.65  insn per cycle
         1,301,336,934      branches                  # 1030.218 M/sec
            22,275,166      branch-misses             #    1.71% of all branches
    
           1.265635106 seconds time elapsed
    
           1.255785000 seconds user
           0.007992000 seconds sys
    
    new:
     Performance counter stats for 'build/latest/bin/bitmap-test --gtest_filter=*Bench* --gtest_repeat=10':
    
                595.63 msec task-clock                #    0.996 CPUs utilized
                   254      context-switches          #    0.426 K/sec
                     0      cpu-migrations            #    0.000 K/sec
                 1,808      page-faults               #    0.003 M/sec
         1,662,752,028      cycles                    #    2.792 GHz
         3,666,713,168      instructions              #    2.21  insn per cycle
           328,412,721      branches                  #  551.374 M/sec
             2,267,098      branch-misses             #    0.69% of all branches
    
           0.598305718 seconds time elapsed
    
           0.588236000 seconds user
           0.007962000 seconds sys
    
    Change-Id: Ibe772a7dd92faf9f99115148ad4cc7df542d1c76
    Reviewed-on: http://gerrit.cloudera.org:8080/15539
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <awong@cloudera.com>
---
 src/kudu/util/bitmap-test.cc |  80 +++++++++++++++++++++------
 src/kudu/util/bitmap.h       | 129 +++++++++++++++++++++++--------------------
 2 files changed, 134 insertions(+), 75 deletions(-)

diff --git a/src/kudu/util/bitmap-test.cc b/src/kudu/util/bitmap-test.cc
index b6a280a..c897736 100644
--- a/src/kudu/util/bitmap-test.cc
+++ b/src/kudu/util/bitmap-test.cc
@@ -19,26 +19,23 @@
 
 #include <cstdint>
 #include <cstring>
+#include <set>
 #include <vector>
 
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/strings/join.h"
+#include "kudu/util/faststring.h"
+#include "kudu/util/random.h"
+#include "kudu/util/random_util.h"
 
 namespace kudu {
 
-static int ReadBackBitmap(uint8_t *bm, size_t bits,
-                           std::vector<size_t> *result) {
-  int iters = 0;
-  for (TrueBitIterator iter(bm, bits);
-       !iter.done();
-       ++iter) {
-    size_t val = *iter;
-    result->push_back(val);
-
-    iters++;
-  }
-  return iters;
+static void ReadBackBitmap(uint8_t *bm, size_t bits,
+                          std::vector<size_t> *result) {
+  ForEachSetBit(bm, bits, [&](size_t b) {
+                            result->push_back(b);
+                          });
 }
 
 TEST(TestBitMap, TestIteration) {
@@ -56,8 +53,7 @@ TEST(TestBitMap, TestIteration) {
 
   std::vector<size_t> read_back;
 
-  int iters = ReadBackBitmap(bm, sizeof(bm)*8, &read_back);
-  ASSERT_EQ(6, iters);
+  ReadBackBitmap(bm, sizeof(bm)*8, &read_back);
   ASSERT_EQ("0,8,31,32,33,63", JoinElements(read_back, ","));
 }
 
@@ -69,11 +65,63 @@ TEST(TestBitMap, TestIteration2) {
 
   std::vector<size_t> read_back;
 
-  int iters = ReadBackBitmap(bm, 3, &read_back);
-  ASSERT_EQ(1, iters);
+  ReadBackBitmap(bm, 3, &read_back);
   ASSERT_EQ("1", JoinElements(read_back, ","));
 }
 
+struct RandomBitmap {
+  RandomBitmap(int n_bits, double set_ratio) {
+    bm.resize(BitmapSize(n_bits));
+    Random r(GetRandomSeed32());
+    for (int i = 0; i < n_bits; i++) {
+      bool set = r.NextDoubleFraction() < set_ratio;
+      BitmapChange(bm.data(), i, set);
+      if (set) {
+        set_bits.insert(i);
+      }
+    }
+  }
+
+  faststring bm;
+  std::set<int> set_bits;
+};
+
+TEST(TestBitMap, TestIterationRandom) {
+  Random r(GetRandomSeed32());
+  const auto kNumBits = 1 + r.Uniform(100);
+  RandomBitmap bm(kNumBits, r.NextDoubleFraction());
+
+  std::set<int> remaining = bm.set_bits;
+
+  // Iterate over the set bits and remove them from the set. When we're
+  // done we should have none left in the set.
+  ForEachSetBit(bm.bm.data(), kNumBits,
+                [&](size_t b) {
+                  EXPECT_EQ(1, remaining.erase(b));
+                });
+  EXPECT_EQ(remaining.size(), 0);
+}
+
+
+
+TEST(TestBitMap, Benchmark) {
+  static constexpr int kNumBits = 1000;
+  static constexpr int kNumTrials = 1000;
+  for (int frac = 0; frac <= 100; frac++) {
+    RandomBitmap bm(kNumBits, frac/100.0);
+
+    volatile int sink = 0;
+    for (int i = 0; i < kNumTrials; i++) {
+      int sum = 0;
+      ForEachSetBit(bm.bm.data(), kNumBits,
+                    [&](size_t b) {
+                      sum += b;
+                    });
+      sink += sum;
+    }
+  }
+}
+
 TEST(TestBitMap, TestSetAndTestBits) {
   uint8_t bm[1];
   memset(bm, 0, sizeof(bm));
diff --git a/src/kudu/util/bitmap.h b/src/kudu/util/bitmap.h
index c29ba09..0c67970 100644
--- a/src/kudu/util/bitmap.h
+++ b/src/kudu/util/bitmap.h
@@ -176,72 +176,83 @@ class BitmapIterator {
   const uint8_t *map_;
 };
 
-// Iterator which yields the set bits in a bitmap.
-// Example usage:
-//   for (TrueBitIterator iter(bitmap, n_bits);
-//        !iter.done();
-//        ++iter) {
-//     int next_onebit_position = *iter;
-//   }
-class TrueBitIterator {
- public:
-  TrueBitIterator(const uint8_t *bitmap, size_t n_bits)
-    : bitmap_(bitmap),
-      cur_byte_(0),
-      cur_byte_idx_(0),
-      n_bits_(n_bits),
-      n_bytes_(BitmapSize(n_bits_)),
-      bit_idx_(0) {
-    if (n_bits_ == 0) {
-      cur_byte_idx_ = 1; // sets done
-    } else {
-      cur_byte_ = bitmap[0];
-      AdvanceToNextOneBit();
+// Iterate over the bits in 'bitmap' and call 'func' for each set bit.
+// 'func' should take a single argument which is the bit's index.
+template<class F>
+void ForEachSetBit(const uint8_t* __restrict__ bitmap,
+                   int n_bits,
+                   const F& func);
+
+// Iterate over the bits in 'bitmap' and call 'func' for each unset bit.
+// 'func' should take a single argument which is the bit's index.
+template<class F>
+void ForEachUnsetBit(const uint8_t* __restrict__ bitmap,
+                     int n_bits,
+                     const F& func);
+
+
+////////////////////////////////////////////////////////////
+// Implementation details
+////////////////////////////////////////////////////////////
+
+template<bool SET, class F>
+inline void ForEachBit(const uint8_t* bitmap,
+                       int n_bits,
+                       const F& func) {
+  int rem = n_bits;
+  int base_idx = 0;
+  while (rem >= 64) {
+    uint64_t w = UnalignedLoad<uint64_t>(bitmap);
+    if (!SET) {
+      w = ~w;
     }
+    bitmap += sizeof(uint64_t);
+
+    // Within each word, keep flipping the least significant non-zero bit and adding
+    // the bit index to the output until none are set.
+    //
+    // Get the count up front so that the loop can be unrolled without dependencies.
+    // The 'tzcnt' instruction that's generated here has a latency of 3 so unrolling
+    // and avoiding any cross-iteration dependencies is beneficial.
+    int tot_count = Bits::CountOnes64withPopcount(w);
+#pragma unroll(3)
+    while (tot_count--) {
+      func(base_idx + Bits::FindLSBSetNonZero64(w));
+      w &= w - 1;
+    }
+    base_idx += 64;
+    rem -= 64;
   }
 
-  TrueBitIterator &operator ++() {
-    DCHECK(!done());
-    DCHECK(cur_byte_ & 1);
-    cur_byte_ &= (~1);
-    AdvanceToNextOneBit();
-    return *this;
-  }
-
-  bool done() const {
-    return cur_byte_idx_ >= n_bytes_;
-  }
-
-  size_t operator *() const {
-    DCHECK(!done());
-    return bit_idx_;
-  }
-
- private:
-  void AdvanceToNextOneBit() {
-    while (cur_byte_ == 0) {
-      cur_byte_idx_++;
-      if (cur_byte_idx_ >= n_bytes_) return;
-      cur_byte_ = bitmap_[cur_byte_idx_];
-      bit_idx_ = cur_byte_idx_ * 8;
+  while (rem > 0) {
+    uint8_t b = *bitmap++;
+    if (!SET) {
+      b = ~b;
     }
-    DVLOG(2) << "Found next nonzero byte at " << cur_byte_idx_
-             << " val=" << cur_byte_;
-
-    DCHECK_NE(cur_byte_, 0);
-    int set_bit = Bits::FindLSBSetNonZero(cur_byte_);
-    bit_idx_ += set_bit;
-    cur_byte_ >>= set_bit;
+    while (b) {
+      int idx = base_idx + Bits::FindLSBSetNonZero(b);
+      if (idx >= n_bits) break;
+      func(idx);
+      b &= b - 1;
+    }
+    base_idx += 8;
+    rem -= 8;
   }
+}
 
-  const uint8_t *bitmap_;
-  uint8_t cur_byte_;
-  uint8_t cur_byte_idx_;
+template<class F>
+inline void ForEachSetBit(const uint8_t* __restrict__ bitmap,
+                          int n_bits,
+                          const F& func) {
+  ForEachBit<true, F>(bitmap, n_bits, func);
+}
 
-  const size_t n_bits_;
-  const size_t n_bytes_;
-  size_t bit_idx_;
-};
+template<class F>
+inline void ForEachUnsetBit(const uint8_t* __restrict__ bitmap,
+                            int n_bits,
+                            const F& func) {
+  ForEachBit<false, F>(bitmap, n_bits, func);
+}
 
 } // namespace kudu
 


Mime
View raw message