kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mpe...@apache.org
Subject [1/2] kudu git commit: KUDU-2119. Fix failure in encoding-test
Date Wed, 06 Sep 2017 23:36:44 GMT
Repository: kudu
Updated Branches:
  refs/heads/master a71ecfd4a -> 4f41a3cb6


KUDU-2119. Fix failure in encoding-test

In commit d1f53cc32 I introduced randomization for the format string
used for the generated string data in this test. The random format
string could sometimes incorporate '\0' bytes, which, in the worst case,
could result in a string of length 0 or 1. This would then cause a later
assertion to fail that was checking that the encoded data be at least
two bytes per string.

The fix switches from using a printf-style string to instead use a
std::function to generate the data. The implementation of the function
avoids using C strings and thus permits embedded null bytes.

Change-Id: Ic46d2a993235e560475d931c7b023eb5b4faf7c2
Reviewed-on: http://gerrit.cloudera.org:8080/7967
Tested-by: Kudu Jenkins
Reviewed-by: Hao Hao <hao.hao@cloudera.com>
Reviewed-by: Alexey Serbin <aserbin@cloudera.com>
Reviewed-by: Andrew Wong <awong@cloudera.com>
Reviewed-by: Adar Dembo <adar@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2211afcb
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2211afcb
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2211afcb

Branch: refs/heads/master
Commit: 2211afcb3c283209c71e26c845bd9ad6c2a13119
Parents: a71ecfd
Author: Todd Lipcon <todd@apache.org>
Authored: Tue Sep 5 17:34:52 2017 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Wed Sep 6 22:20:03 2017 +0000

----------------------------------------------------------------------
 src/kudu/cfile/encoding-test.cc | 67 +++++++++++++++++-------------------
 src/kudu/util/random_util.cc    | 13 ++++++-
 src/kudu/util/random_util.h     |  5 +++
 3 files changed, 48 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2211afcb/src/kudu/cfile/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc
index ba10ed4..9dcdcb4 100644
--- a/src/kudu/cfile/encoding-test.cc
+++ b/src/kudu/cfile/encoding-test.cc
@@ -22,6 +22,7 @@
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <functional>
 #include <limits>
 #include <memory>
 #include <ostream>
@@ -44,7 +45,6 @@
 #include "kudu/common/schema.h"
 #include "kudu/common/types.h"
 #include "kudu/gutil/gscoped_ptr.h"
-#include "kudu/gutil/macros.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/substitute.h"
@@ -89,15 +89,17 @@ class TestEncoding : public KuduTest {
   }
 
   // Insert a given number of strings into the provided BlockBuilder.
+  //
+  // The strings are generated using the provided 'formatter' function.
   template<class BuilderType>
   static Slice CreateBinaryBlock(BuilderType *sbb,
                                  int num_items,
-                                 const char *fmt_str) {
-    vector<unique_ptr<string>> to_insert;
+                                 const std::function<string(int)>& formatter) {
+    vector<string> to_insert(num_items);
     vector<Slice> slices;
-    for (uint i = 0; i < num_items; i++) {
-      to_insert.emplace_back(new string(StringPrintf(fmt_str, i)));
-      slices.emplace_back(to_insert.back()->data());
+    for (int i = 0; i < num_items; i++) {
+      to_insert[i] = formatter(i);
+      slices.emplace_back(to_insert[i]);
     }
 
     int rem = slices.size();
@@ -126,7 +128,8 @@ class TestEncoding : public KuduTest {
     BuilderType sbb(opts.get());
     // Insert "hello 0" through "hello 9"
     const uint kCount = 10;
-    Slice s = CreateBinaryBlock(&sbb, kCount, "hello %d");
+    Slice s = CreateBinaryBlock(
+        &sbb, kCount, std::bind(StringPrintf, "hello %d", std::placeholders::_1));
     DecoderType sbd(s);
     ASSERT_OK(sbd.ParseHeader());
 
@@ -180,7 +183,8 @@ class TestEncoding : public KuduTest {
     BinaryPrefixBlockBuilder sbb(opts.get());
     const uint kCount = 1000;
     // Insert 'hello 000' through 'hello 999'
-    Slice s = CreateBinaryBlock(&sbb, kCount, "hello %03d");
+    Slice s = CreateBinaryBlock(
+        &sbb, kCount, std::bind(StringPrintf, "hello %03d", std::placeholders::_1));
     BinaryPrefixBlockDecoder sbd(s);
     ASSERT_OK(sbd.ParseHeader());
 
@@ -251,44 +255,34 @@ class TestEncoding : public KuduTest {
     }
   }
 
-  // Create a printf-style format string of the form 'XXXXXXXX%d' where the 'X' characters
-  // are random bytes (not including '%', possibly including non-printable characters).
-  static string RandomFormatString(Random* r) {
-    char buf[8];
-    RandomString(buf, arraysize(buf), r);
-    string format_string(buf, arraysize(buf));
-    for (int i = 0; i < format_string.size(); i++) {
-      if (format_string[i] == '%') {
-        format_string[i] = 'x';
-      }
-    }
-    return format_string + "%d";
-  }
-
   template<class BuilderType, class DecoderType>
   void TestBinaryBlockRoundTrip() {
     gscoped_ptr<WriterOptions> opts(NewWriterOptions());
     BuilderType sbb(opts.get());
 
-    // Generate a random format string which uses some binary data.
+    auto seed = SeedRandom();
+    Random r(seed);
+
+    // For each row, generate random data based on this run's seed.
     // Using random data helps the ability to trigger bugs like KUDU-2085.
-    Random r(SeedRandom());
-    const string& format_string = RandomFormatString(&r);
-    const auto& GenTestString = [&](int x) {
-      return StringPrintf(format_string.c_str(), x);
+    const auto& GenTestString = [seed](int i) {
+      Random local_rng(seed + i);
+      int len = local_rng.Uniform(8);
+      return RandomString(len, &local_rng);
     };
 
-    // Use a random number of elements. This is necessary to trigger bugs
-    // like KUDU-2085 that only occur in specific cases such as when
-    // the number of elements is a multiple of the 'restart interval'
-    // in prefix-encoded blocks.
-    const uint kCount = r.Uniform(1000);
-    Slice s = CreateBinaryBlock(&sbb, kCount, format_string.c_str());
+    // Use a random number of elements (but at least 1).
+    //
+    // This is necessary to trigger bugs like KUDU-2085 that only occur in specific cases
+    // such as when the number of elements is a multiple of the 'restart interval' in
+    // prefix-encoded blocks.
+    const uint kCount = r.Uniform(1000) + 1;
+    Slice s = CreateBinaryBlock(&sbb, kCount, GenTestString);
 
     LOG(INFO) << "Block: " << HexDump(s);
 
-    // the slice should take at least a few bytes per entry
-    ASSERT_GT(s.size(), kCount * 2u);
+    // The encoded data should take at least 1 byte per entry.
+    ASSERT_GT(s.size(), kCount);
 
     // Check first/last keys
     Slice key;
@@ -480,7 +474,8 @@ class TestEncoding : public KuduTest {
     const uint kCount = 10;
     size_t sbsize;
 
-    Slice s = CreateBinaryBlock(&sbb, kCount, "hello %d");
+    Slice s = CreateBinaryBlock(
+        &sbb, kCount, std::bind(StringPrintf, "hello %d", std::placeholders::_1));
     do {
       sbsize = s.size();
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/2211afcb/src/kudu/util/random_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/random_util.cc b/src/kudu/util/random_util.cc
index a102a32..acd4084 100644
--- a/src/kudu/util/random_util.cc
+++ b/src/kudu/util/random_util.cc
@@ -21,10 +21,14 @@
 
 #include <cstdlib>
 #include <cstring>
+#include <string>
 
+#include "kudu/gutil/walltime.h"
 #include "kudu/util/env.h"
+#include "kudu/util/faststring.h"
 #include "kudu/util/random.h"
-#include "kudu/gutil/walltime.h"
+
+using std::string;
 
 namespace kudu {
 
@@ -42,6 +46,13 @@ void RandomString(void* dest, size_t n, Random* rng) {
   memcpy(cdest + i, &random, n - i);
 }
 
+string RandomString(size_t n, Random* rng) {
+  faststring s;
+  s.resize(n);
+  RandomString(s.data(), n, rng);
+  return s.ToString();
+}
+
 uint32_t GetRandomSeed32() {
   uint32_t seed = static_cast<uint32_t>(GetCurrentTimeMicros());
   seed *= getpid();

http://git-wip-us.apache.org/repos/asf/kudu/blob/2211afcb/src/kudu/util/random_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/random_util.h b/src/kudu/util/random_util.h
index e286bbe..bda8c42 100644
--- a/src/kudu/util/random_util.h
+++ b/src/kudu/util/random_util.h
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <stdint.h>
 
+#include <string>
+
 namespace kudu {
 
 class Random;
@@ -30,6 +32,9 @@ class Random;
 // be written to dest with the same probability as any other byte.
 void RandomString(void* dest, size_t n, Random* rng);
 
+// Same as the above, but returns the string.
+std::string RandomString(size_t n, Random* rng);
+
 // Generate a 32-bit random seed from several sources, including timestamp,
 // pid & tid.
 uint32_t GetRandomSeed32();


Mime
View raw message