parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject parquet-cpp git commit: PARQUET-507: Reduce the runtime of rle-test
Date Sat, 06 Feb 2016 20:08:38 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 04d75c7cb -> a5892f52f


PARQUET-507: Reduce the runtime of rle-test

I twiddled this a bit to cut the runtime in half. I'd like to reduce it further but looking
for feedback -- my preference would be to use system entropy (`std::random_device`) to seed
the PRNG and print the seed on failure. So we could run far fewer tests (e.g. only 50 or 100
or so) and occasionally run into flakiness or failure if we refactor and break something internally.
Thoughts?

Author: Wes McKinney <wes@cloudera.com>

Closes #37 from wesm/PARQUET-507 and squashes the following commits:

d75f2ed [Wes McKinney] Tidying per comments
0ed951a [Wes McKinney] Buglet
ba97491 [Wes McKinney] Further shorten random tests; use device entropy and print random seed
on failure
a357dd1 [Wes McKinney] Preallocate vector in BitRle.Random and run half as many iterations


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/a5892f52
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/a5892f52
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/a5892f52

Branch: refs/heads/master
Commit: a5892f52f4b328534e019116a110d318ac07e386
Parents: 04d75c7
Author: Wes McKinney <wes@cloudera.com>
Authored: Sat Feb 6 12:08:31 2016 -0800
Committer: Julien Le Dem <julien@dremio.com>
Committed: Sat Feb 6 12:08:31 2016 -0800

----------------------------------------------------------------------
 src/parquet/util/rle-test.cc | 59 +++++++++++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a5892f52/src/parquet/util/rle-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/rle-test.cc b/src/parquet/util/rle-test.cc
index b2628e9..df020f5 100644
--- a/src/parquet/util/rle-test.cc
+++ b/src/parquet/util/rle-test.cc
@@ -21,6 +21,7 @@
 #include <stdio.h>
 #include <cstdint>
 #include <iostream>
+#include <random>
 #include <vector>
 
 #include <boost/utility.hpp>
@@ -204,6 +205,32 @@ void ValidateRle(const vector<int>& values, int bit_width,
   }
 }
 
+// A version of ValidateRle that round-trips the values and returns false if
+// the returned values are not all the same
+bool CheckRoundTrip(const vector<int>& values, int bit_width) {
+  const int len = 64 * 1024;
+  uint8_t buffer[len];
+  RleEncoder encoder(buffer, len, bit_width);
+  for (int i = 0; i < values.size(); ++i) {
+    bool result = encoder.Put(values[i]);
+    if (!result) {
+      return false;
+    }
+  }
+  int encoded_len = encoder.Flush();
+  int out;
+
+  RleDecoder decoder(buffer, len, bit_width);
+  for (int i = 0; i < values.size(); ++i) {
+    uint64_t val;
+    bool result = decoder.Get(&out);
+    if (values[i] != out) {
+      return false;
+    }
+  }
+  return true;
+}
+
 TEST(Rle, SpecificSequences) {
   const int len = 1024;
   uint8_t expected_buffer[len];
@@ -317,15 +344,27 @@ TEST(BitRle, Flush) {
 
 // Test some random sequences.
 TEST(BitRle, Random) {
-  int iters = 0;
-  while (iters < 1000) {
-    srand(iters++);
-    if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
-    vector<int> values;
+  size_t niters = 50;
+  size_t ngroups = 1000;
+  size_t max_group_size = 16;
+  vector<int> values(ngroups + max_group_size);
+
+  // prng setup
+  std::random_device rd;
+  std::uniform_int_distribution<int> dist(1, 20);
+
+  uint32_t seed = 0;
+  for (int iter = 0; iter < niters; ++iter) {
+    // generate a seed with device entropy
+    uint32_t seed = rd();
+    std::mt19937 gen(seed);
+
     bool parity = 0;
-    for (int i = 0; i < 1000; ++i) {
-      int group_size = rand() % 20 + 1;  // NOLINT
-      if (group_size > 16) {
+    values.resize(0);
+
+    for (int i = 0; i < ngroups; ++i) {
+      int group_size = dist(gen);
+      if (group_size > max_group_size) {
         group_size = 1;
       }
       for (int i = 0; i < group_size; ++i) {
@@ -333,7 +372,9 @@ TEST(BitRle, Random) {
       }
       parity = !parity;
     }
-    ValidateRle(values, (iters % MAX_WIDTH) + 1, NULL, -1);
+    if (!CheckRoundTrip(values, BitUtil::NumRequiredBits(values.size()))) {
+      FAIL() << "failing seed: " << seed;
+    }
   }
 }
 


Mime
View raw message