kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject kudu git commit: KUDU-2085. Fix crash when seeking past end of prefix-encoded blocks
Date Tue, 08 Aug 2017 01:33:55 GMT
Repository: kudu
Updated Branches:
  refs/heads/branch-1.4.x 9adfd2db4 -> b686a2675


KUDU-2085. Fix crash when seeking past end of prefix-encoded blocks

This fixes a bug when seeking past the last element of a prefix-encoded
binary block. In very specific circumstances, described in the JIRA,
this would cause a Status::Corruption() to be returned.

Prior to the fix, the updated test would fail pretty reliably when
looped 50-100 times. With the patch, I looped it 1000 times with no
failure.

Change-Id: I1670b2244d586e4920f0603c48f65ed993a3b12b
Reviewed-on: http://gerrit.cloudera.org:8080/7545
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <davidralves@gmail.com>
Reviewed-by: Andrew Wong <awong@cloudera.com>
(cherry picked from commit d1f53cc323d5d07e79304765fe171f535c1d548a)
Reviewed-on: http://gerrit.cloudera.org:8080/7611
Reviewed-by: Jean-Daniel Cryans <jdcryans@apache.org>
Tested-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b686a267
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b686a267
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b686a267

Branch: refs/heads/branch-1.4.x
Commit: b686a2675cfdd7867fb125fcefe250c34227c756
Parents: 9adfd2d
Author: Todd Lipcon <todd@apache.org>
Authored: Mon Jul 31 18:09:01 2017 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Tue Aug 8 01:32:40 2017 +0000

----------------------------------------------------------------------
 src/kudu/cfile/binary_prefix_block.cc |  9 +++++-
 src/kudu/cfile/encoding-test.cc       | 48 +++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b686a267/src/kudu/cfile/binary_prefix_block.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/binary_prefix_block.cc b/src/kudu/cfile/binary_prefix_block.cc
index cc6eab7..cc2a5b8 100644
--- a/src/kudu/cfile/binary_prefix_block.cc
+++ b/src/kudu/cfile/binary_prefix_block.cc
@@ -288,6 +288,13 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint pos) {
 
   DCHECK_LE(pos, num_elems_);
 
+  // Seeking past the last element is valid -- it just sets us to the
+  // same state as if we have read all of the elements.
+  if (pos == num_elems_) {
+    cur_idx_ = pos;
+    return;
+  }
+
   int target_restart = pos/restart_interval_;
   SeekToRestartPoint(target_restart);
 
@@ -302,7 +309,7 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint pos) {
 // point. Note that the restart points in the file do not include
 // the '0' restart point, since that is simply the beginning of
 // the data and hence a waste of space. So, 'idx' may range from
-// 0 (first record) through num_restarts_ (last recorded restart point)
+// 0 (first record) through num_restarts_ (exclusive).
 const uint8_t * BinaryPrefixBlockDecoder::GetRestartPoint(uint32_t idx) const {
   DCHECK_LE(idx, num_restarts_);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b686a267/src/kudu/cfile/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc
index 6f5d7e1..001444f 100644
--- a/src/kudu/cfile/encoding-test.cc
+++ b/src/kudu/cfile/encoding-test.cc
@@ -37,6 +37,7 @@
 #include "kudu/util/hexdump.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/random.h"
+#include "kudu/util/random_util.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
@@ -70,8 +71,7 @@ class TestEncoding : public KuduTest {
     ASSERT_EQ(1, n);
   }
 
-  // Insert a given number of strings into the provided
-  // BinaryPrefixBlockBuilder.
+  // Insert a given number of strings into the provided BlockBuilder.
   template<class BuilderType>
   static Slice CreateBinaryBlock(BuilderType *sbb,
                                  int num_items,
@@ -234,12 +234,39 @@ class TestEncoding : public KuduTest {
     }
   }
 
+  // Create a printf-style format string of the form 'XXXXXXXX%d' where the 'X' characters
+  // are random bytes (not including '%', possibly including non-printable characters).
+  static string RandomFormatString(Random* r) {
+    char buf[8];
+    RandomString(buf, arraysize(buf), r);
+    string format_string(buf, arraysize(buf));
+    for (int i = 0; i < format_string.size(); i++) {
+      if (format_string[i] == '%') {
+        format_string[i] = 'x';
+      }
+    }
+    return format_string + "%d";
+  }
+
   template<class BuilderType, class DecoderType>
   void TestBinaryBlockRoundTrip() {
     gscoped_ptr<WriterOptions> opts(NewWriterOptions());
     BuilderType sbb(opts.get());
-    const uint kCount = 10;
-    Slice s = CreateBinaryBlock(&sbb, kCount, "hello %d");
+
+    // Generate a random format string which uses some binary data.
+    // Using random data helps the ability to trigger bugs like KUDU-2085.
+    Random r(SeedRandom());
+    const string& format_string = RandomFormatString(&r);
+    const auto& GenTestString = [&](int x) {
+      return StringPrintf(format_string.c_str(), x);
+    };
+
+    // Use a random number of elements. This is necessary to trigger bugs
+    // like KUDU-2085 that only occur in specific cases such as when
+    // the number of elements is a multiple of the 'restart interval'
+    // in prefix-encoded blocks.
+    const uint kCount = r.Uniform(1000);
+    Slice s = CreateBinaryBlock(&sbb, kCount, format_string.c_str());
 
     LOG(INFO) << "Block: " << HexDump(s);
 
@@ -249,9 +276,9 @@ class TestEncoding : public KuduTest {
     // Check first/last keys
     Slice key;
     ASSERT_OK(sbb.GetFirstKey(&key));
-    ASSERT_EQ("hello 0", key);
+    ASSERT_EQ(GenTestString(0), key);
     ASSERT_OK(sbb.GetLastKey(&key));
-    ASSERT_EQ(StringPrintf("hello %d", kCount - 1), key);
+    ASSERT_EQ(GenTestString(kCount - 1), key);
 
     DecoderType sbd(s);
     ASSERT_OK(sbd.ParseHeader());
@@ -266,7 +293,7 @@ class TestEncoding : public KuduTest {
       ASSERT_TRUE(sbd.HasNext()) << "Failed on iter " << i;
       Slice s;
       CopyOne<STRING>(&sbd, &s);
-      string expected = StringPrintf("hello %d", i);
+      string expected = GenTestString(i);
       ASSERT_EQ(expected, s.ToString()) << "failed at iter " << i;
     }
     ASSERT_FALSE(sbd.HasNext());
@@ -277,6 +304,11 @@ class TestEncoding : public KuduTest {
       ASSERT_EQ(i, sbd.GetCurrentIndex());
     }
 
+    // Test the special case of seeking to the end of the block.
+    sbd.SeekToPositionInBlock(kCount);
+    ASSERT_EQ(kCount, sbd.GetCurrentIndex());
+    ASSERT_FALSE(sbd.HasNext());
+
     // Try to request a bunch of data in one go
     ScopedColumnBlock<STRING> cb(kCount + 10);
     ColumnDataView cdv(&cb);
@@ -287,7 +319,7 @@ class TestEncoding : public KuduTest {
     ASSERT_FALSE(sbd.HasNext());
 
     for (uint i = 0; i < kCount; i++) {
-      string expected = StringPrintf("hello %d", i);
+      string expected = GenTestString(i);
       ASSERT_EQ(expected, cb[i].ToString());
     }
   }


Mime
View raw message