kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [3/3] kudu git commit: KUDU-100 make RLE encoder handle 64-bit integer.
Date Sat, 29 Oct 2016 00:18:56 GMT
KUDU-100 make RLE encoder handle 64-bit integer.

Fix edge cases in bit stream.

Change-Id: I40974af3a9330cdfe4410c16293f330d0eccd03d
Reviewed-on: http://gerrit.cloudera.org:8080/4822
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/a1a8eef2
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/a1a8eef2
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/a1a8eef2

Branch: refs/heads/master
Commit: a1a8eef23a3cdab198cbae88a999cefb38ff2aea
Parents: fd89792
Author: honghaijei <honghaijie@gmail.com>
Authored: Mon Oct 24 10:49:27 2016 +0000
Committer: Todd Lipcon <todd@apache.org>
Committed: Sat Oct 29 00:16:57 2016 +0000

----------------------------------------------------------------------
 src/kudu/util/bit-stream-utils.inline.h | 17 ++++++------
 src/kudu/util/bit-util-test.cc          |  8 ++++++
 src/kudu/util/bit-util.h                | 12 ++++++++
 src/kudu/util/rle-test.cc               | 41 ++++++++++++++++++++++------
 4 files changed, 61 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/a1a8eef2/src/kudu/util/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/bit-stream-utils.inline.h b/src/kudu/util/bit-stream-utils.inline.h
index 6d6aad4..569197b 100644
--- a/src/kudu/util/bit-stream-utils.inline.h
+++ b/src/kudu/util/bit-stream-utils.inline.h
@@ -19,17 +19,18 @@
 
 #include <algorithm>
 
+#include "glog/logging.h"
 #include "kudu/util/bit-stream-utils.h"
 #include "kudu/util/alignment.h"
 
 namespace kudu {
 
 inline void BitWriter::PutValue(uint64_t v, int num_bits) {
-  // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
-  DCHECK_LE(num_bits, 32);
+  DCHECK_LE(num_bits, 64);
   // Truncate the higher-order bits. This is necessary to
   // support signed values.
-  v &= (1ULL << num_bits) - 1;
+  v &= ~0ULL >> (64 - num_bits);
+
 
   buffered_values_ |= v << bit_offset_;
   bit_offset_ += num_bits;
@@ -43,7 +44,7 @@ inline void BitWriter::PutValue(uint64_t v, int num_bits) {
     buffered_values_ = 0;
     byte_offset_ += 8;
     bit_offset_ -= 64;
-    buffered_values_ = v >> (num_bits - bit_offset_);
+    buffered_values_ = BitUtil::ShiftRightZeroOnOverflow(v, (num_bits - bit_offset_));
   }
   DCHECK_LT(bit_offset_, 64);
 }
@@ -109,8 +110,7 @@ inline void BitReader::BufferValues() {
 
 template<typename T>
 inline bool BitReader::GetValue(int num_bits, T* v) {
-  // TODO: revisit this limit if necessary
-  DCHECK_LE(num_bits, 32);
+  DCHECK_LE(num_bits, 64);
   DCHECK_LE(num_bits, sizeof(T) * 8);
 
   if (PREDICT_FALSE(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return
false;
@@ -123,8 +123,9 @@ inline bool BitReader::GetValue(int num_bits, T* v) {
     bit_offset_ -= 64;
     BufferValues();
     // Read bits of v that crossed into new buffered_values_
-    *v |= BitUtil::TrailingBits(buffered_values_, bit_offset_)
-          << (num_bits - bit_offset_);
+    *v |= BitUtil::ShiftLeftZeroOnOverflow(
+        BitUtil::TrailingBits(buffered_values_, bit_offset_),
+        (num_bits - bit_offset_));
   }
   DCHECK_LE(bit_offset_, 64);
   return true;

http://git-wip-us.apache.org/repos/asf/kudu/blob/a1a8eef2/src/kudu/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/bit-util-test.cc b/src/kudu/util/bit-util-test.cc
index 8c43562..0d8eab4 100644
--- a/src/kudu/util/bit-util-test.cc
+++ b/src/kudu/util/bit-util-test.cc
@@ -32,6 +32,14 @@ TEST(BitUtil, TrailingBits) {
   EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 0), 0);
   EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 63), 0);
   EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 64), 1LL << 63);
+
+}
+
+TEST(BitUtil, ShiftBits) {
+  EXPECT_EQ(BitUtil::ShiftLeftZeroOnOverflow(1ULL, 64), 0ULL);
+  EXPECT_EQ(BitUtil::ShiftLeftZeroOnOverflow(0xFFFFFFFFFFFFFFFFULL, 32), 0xFFFFFFFF00000000ULL);
+  EXPECT_EQ(BitUtil::ShiftRightZeroOnOverflow(1ULL, 64), 0ULL);
+  EXPECT_EQ(BitUtil::ShiftRightZeroOnOverflow(0xFFFFFFFFFFFFFFFFULL, 32), 0x00000000FFFFFFFFULL);
 }
 
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/a1a8eef2/src/kudu/util/bit-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/bit-util.h b/src/kudu/util/bit-util.h
index 7f11f05..21f1558 100644
--- a/src/kudu/util/bit-util.h
+++ b/src/kudu/util/bit-util.h
@@ -38,6 +38,18 @@ class BitUtil {
     int n = 64 - num_bits;
     return (v << n) >> n;
   }
+
+  static inline uint64_t ShiftLeftZeroOnOverflow(uint64_t v, int num_bits) {
+    if (num_bits >= 64) return 0;
+    return v << num_bits;
+  }
+
+  static inline uint64_t ShiftRightZeroOnOverflow(uint64_t v, int num_bits) {
+    if (num_bits >= 64) return 0;
+    return v >> num_bits;
+  }
+
+
 };
 
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/a1a8eef2/src/kudu/util/rle-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/rle-test.cc b/src/kudu/util/rle-test.cc
index 3b71dc2..185fed5 100644
--- a/src/kudu/util/rle-test.cc
+++ b/src/kudu/util/rle-test.cc
@@ -35,7 +35,7 @@ using std::vector;
 
 namespace kudu {
 
-const int MAX_WIDTH = 32;
+const int MAX_WIDTH = 64;
 
 class TestRle : public KuduTest {};
 
@@ -205,7 +205,7 @@ void ValidateRle(const vector<T>& values, int bit_width,
 TEST(Rle, SpecificSequences) {
   const int kTestLen = 1024;
   uint8_t expected_buffer[kTestLen];
-  vector<int> values;
+  vector<uint64_t> values;
 
   // Test 50 0' followed by 50 1's
   values.resize(100);
@@ -251,10 +251,10 @@ TEST(Rle, SpecificSequences) {
 // ValidateRle on 'num_vals' values with width 'bit_width'. If 'value' != -1, that value
 // is used, otherwise alternating values are used.
 void TestRleValues(int bit_width, int num_vals, int value = -1) {
-  const uint64_t mod = (bit_width == 64) ? 1 : 1LL << bit_width;
-  vector<int> values;
-  for (int v = 0; v < num_vals; ++v) {
-    values.push_back((value != -1) ? value : (v % mod));
+  const uint64_t mod = bit_width == 64 ? 1ULL : 1ULL << bit_width;
+  vector<uint64_t> values;
+  for (uint64_t v = 0; v < num_vals; ++v) {
+    values.push_back((value != -1) ? value : (bit_width == 64 ? v : (v % mod)));
   }
   ValidateRle(values, bit_width, nullptr, -1);
 }
@@ -301,14 +301,14 @@ TEST_F(BitRle, Flush) {
   ValidateRle(values, 1, nullptr, -1);
 }
 
-// Test some random sequences.
-TEST_F(BitRle, Random) {
+// Test some random bool sequences.
+TEST_F(BitRle, RandomBools) {
   int iters = 0;
   const int n_iters = AllowSlowTests() ? 1000 : 20;
   while (iters < n_iters) {
     srand(iters++);
     if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
-    vector<int> values;
+    vector<uint64_t > values;
     bool parity = 0;
     for (int i = 0; i < 1000; ++i) {
       int group_size = rand() % 20 + 1; // NOLINT(*)
@@ -324,6 +324,29 @@ TEST_F(BitRle, Random) {
   }
 }
 
+// Test some random 64-bit sequences.
+TEST_F(BitRle, Random64Bit) {
+  int iters = 0;
+  const int n_iters = AllowSlowTests() ? 1000 : 20;
+  while (iters < n_iters) {
+    srand(iters++);
+    if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
+    vector<uint64_t > values;
+    for (int i = 0; i < 1000; ++i) {
+      int group_size = rand() % 20 + 1; // NOLINT(*)
+      uint64_t cur_value = (static_cast<uint64_t>(rand()) << 32) + static_cast<uint64_t>(rand());
+      if (group_size > 16) {
+        group_size = 1;
+      }
+      for (int i = 0; i < group_size; ++i) {
+        values.push_back(cur_value);
+      }
+
+    }
+    ValidateRle(values, 64, nullptr, -1);
+  }
+}
+
 // Test a sequence of 1 0's, 2 1's, 3 0's. etc
 // e.g. 011000111100000
 TEST_F(BitRle, RepeatedPattern) {


Mime
View raw message