arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [arrow] branch master updated: ARROW-1750: [C++] Remove the need for arrow/util/random.h
Date Mon, 06 Nov 2017 17:55:55 GMT
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0106f53  ARROW-1750: [C++] Remove the need for arrow/util/random.h
0106f53 is described below

commit 0106f531c04477b1c8bd088d097624ff43b44658
Author: Phillip Cloud <cpcloud@gmail.com>
AuthorDate: Mon Nov 6 12:55:49 2017 -0500

    ARROW-1750: [C++] Remove the need for arrow/util/random.h
    
    Author: Phillip Cloud <cpcloud@gmail.com>
    
    Closes #1283 from cpcloud/ARROW-1750 and squashes the following commits:
    
    3f6af737 [Phillip Cloud] ARROW-1750: [C++] Remove the need for arrow/util/random.h
---
 cpp/src/arrow/array-test.cc        |  93 ++++++++++------
 cpp/src/arrow/ipc/ipc-json-test.cc |   4 +-
 cpp/src/arrow/test-util.h          | 214 ++++++++++++++++++++++++++++---------
 cpp/src/arrow/util/CMakeLists.txt  |   1 -
 cpp/src/arrow/util/decimal.cc      |  11 +-
 cpp/src/arrow/util/decimal.h       |   5 +-
 cpp/src/arrow/util/random.h        | 126 ----------------------
 7 files changed, 236 insertions(+), 218 deletions(-)

diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 168ef10..9f248cd 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -263,6 +263,8 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_TRUE(result->Equals(*expected));
   }
 
+  int64_t FlipValue(int64_t value) const { return ~value; }
+
  protected:
   std::shared_ptr<DataType> type_;
   std::unique_ptr<BuilderType> builder_;
@@ -272,44 +274,64 @@ class TestPrimitiveBuilder : public TestBuilder {
   vector<uint8_t> valid_bytes_;
 };
 
-#define PTYPE_DECL(CapType, c_type)               \
-  typedef CapType##Array ArrayType;               \
-  typedef CapType##Builder BuilderType;           \
-  typedef CapType##Type Type;                     \
-  typedef c_type T;                               \
-                                                  \
-  static std::shared_ptr<DataType> type() {       \
-    return std::shared_ptr<DataType>(new Type()); \
-  }
+/// \brief uint8_t isn't a valid template parameter to uniform_int_distribution, so
+/// we use SampleType to determine which kind of integer to use to sample.
+template <typename T,
+          typename = typename std::enable_if<std::is_integral<T>::value, T>::type>
+struct UniformIntSampleType {
+  using type = T;
+};
+
+template <>
+struct UniformIntSampleType<uint8_t> {
+  using type = uint16_t;
+};
+
+template <>
+struct UniformIntSampleType<int8_t> {
+  using type = int16_t;
+};
 
-#define PINT_DECL(CapType, c_type, LOWER, UPPER)    \
+#define PTYPE_DECL(CapType, c_type)     \
+  typedef CapType##Array ArrayType;     \
+  typedef CapType##Builder BuilderType; \
+  typedef CapType##Type Type;           \
+  typedef c_type T;                     \
+                                        \
+  static std::shared_ptr<DataType> type() { return std::make_shared<Type>();
}
+
+#define PINT_DECL(CapType, c_type)                                                      
\
+  struct P##CapType {                                                                   
\
+    PTYPE_DECL(CapType, c_type)                                                         
\
+    static void draw(int64_t N, vector<T>* draws) {                               
      \
+      using sample_type = typename UniformIntSampleType<c_type>::type;            
      \
+      const T lower = std::numeric_limits<T>::min();                              
      \
+      const T upper = std::numeric_limits<T>::max();                              
      \
+      test::randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper),
\
+                    draws);                                                             
\
+    }                                                                                   
\
+  }
+
+#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER)  \
   struct P##CapType {                               \
     PTYPE_DECL(CapType, c_type)                     \
     static void draw(int64_t N, vector<T>* draws) { \
-      test::randint<T>(N, LOWER, UPPER, draws);     \
+      test::random_real(N, 0, LOWER, UPPER, draws); \
     }                                               \
   }
 
-#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER)     \
-  struct P##CapType {                                  \
-    PTYPE_DECL(CapType, c_type)                        \
-    static void draw(int64_t N, vector<T>* draws) {    \
-      test::random_real<T>(N, 0, LOWER, UPPER, draws); \
-    }                                                  \
-  }
-
-PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX);
-PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX);
-PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX);
-PINT_DECL(UInt64, uint64_t, 0, UINT64_MAX);
+PINT_DECL(UInt8, uint8_t);
+PINT_DECL(UInt16, uint16_t);
+PINT_DECL(UInt32, uint32_t);
+PINT_DECL(UInt64, uint64_t);
 
-PINT_DECL(Int8, int8_t, INT8_MIN, INT8_MAX);
-PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX);
-PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX);
-PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX);
+PINT_DECL(Int8, int8_t);
+PINT_DECL(Int16, int16_t);
+PINT_DECL(Int32, int32_t);
+PINT_DECL(Int64, int64_t);
 
-PFLOAT_DECL(Float, float, -1000, 1000);
-PFLOAT_DECL(Double, double, -1000, 1000);
+PFLOAT_DECL(Float, float, -1000.0f, 1000.0f);
+PFLOAT_DECL(Double, double, -1000.0, 1000.0);
 
 struct PBoolean {
   PTYPE_DECL(Boolean, uint8_t)
@@ -325,6 +347,11 @@ void TestPrimitiveBuilder<PBoolean>::RandomData(int64_t N, double
pct_null) {
 }
 
 template <>
+int64_t TestPrimitiveBuilder<PBoolean>::FlipValue(int64_t value) const {
+  return !value;
+}
+
+template <>
 void TestPrimitiveBuilder<PBoolean>::Check(const std::unique_ptr<BooleanBuilder>&
builder,
                                            bool nullable) {
   int64_t size = builder->length();
@@ -454,8 +481,8 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
   const int64_t first_valid_idx = std::distance(valid_bytes.begin(), first_valid);
   // This should be true with a very high probability, but might introduce flakiness
   ASSERT_LT(first_valid_idx, size - 1);
-  draws[first_valid_idx] =
-      static_cast<T>(~*reinterpret_cast<int64_t*>(&draws[first_valid_idx]));
+  draws[first_valid_idx] = static_cast<T>(
+      this->FlipValue(*reinterpret_cast<int64_t*>(&draws[first_valid_idx])));
   ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array));
 
   // test normal equality
@@ -724,8 +751,8 @@ void CheckSliceApproxEquals() {
   vector<T> draws2;
 
   const uint32_t kSeed = 0;
-  test::random_real<T>(kSize, kSeed, 0, 100, &draws1);
-  test::random_real<T>(kSize, kSeed + 1, 0, 100, &draws2);
+  test::random_real(kSize, kSeed, 0.0, 100.0, &draws1);
+  test::random_real(kSize, kSeed + 1, 0.0, 100.0, &draws2);
 
   // Make the draws equal in the sliced segment, but unequal elsewhere (to
   // catch not using the slice offset)
diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc
index f2dd9e7..a560f09 100644
--- a/cpp/src/arrow/ipc/ipc-json-test.cc
+++ b/cpp/src/arrow/ipc/ipc-json-test.cc
@@ -222,8 +222,8 @@ void MakeBatchArrays(const std::shared_ptr<Schema>& schema,
const int num_rows,
   std::vector<int8_t> v1_values;
   std::vector<int32_t> v2_values;
 
-  test::randint<int8_t>(num_rows, 0, 100, &v1_values);
-  test::randint<int32_t>(num_rows, 0, 100, &v2_values);
+  test::randint(num_rows, 0, 100, &v1_values);
+  test::randint(num_rows, 0, 100, &v2_values);
 
   std::shared_ptr<Array> v1;
   ArrayFromVector<Int8Type, int8_t>(is_valid, v1_values, &v1);
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 044fb94..7306f57 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -18,6 +18,7 @@
 #ifndef ARROW_TEST_UTIL_H_
 #define ARROW_TEST_UTIL_H_
 
+#include <algorithm>
 #include <cstdint>
 #include <limits>
 #include <memory>
@@ -38,8 +39,8 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/random.h"
 
 #define ASSERT_RAISES(ENUM, expr) \
   do {                            \
@@ -47,7 +48,7 @@
     if (!s.Is##ENUM()) {          \
       FAIL() << s.ToString();     \
     }                             \
-  } while (0)
+  } while (false)
 
 #define ASSERT_OK(expr)         \
   do {                          \
@@ -55,7 +56,7 @@
     if (!s.ok()) {              \
       FAIL() << s.ToString();   \
     }                           \
-  } while (0)
+  } while (false)
 
 #define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
 
@@ -63,15 +64,15 @@
   do {                          \
     ::arrow::Status s = (expr); \
     EXPECT_TRUE(s.ok());        \
-  } while (0)
+  } while (false)
 
 #define ABORT_NOT_OK(s)                  \
   do {                                   \
     ::arrow::Status _s = (s);            \
     if (ARROW_PREDICT_FALSE(!_s.ok())) { \
-      exit(-1);                          \
+      exit(EXIT_FAILURE);                \
     }                                    \
-  } while (0);
+  } while (false);
 
 namespace arrow {
 
@@ -79,27 +80,22 @@ using ArrayVector = std::vector<std::shared_ptr<Array>>;
 
 namespace test {
 
-template <typename T>
-void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
-  Random rng(random_seed());
-  uint64_t draw;
-  uint64_t span = upper - lower;
-  T val;
-  for (int64_t i = 0; i < N; ++i) {
-    draw = rng.Uniform64(span);
-    val = static_cast<T>(draw + lower);
-    out->push_back(val);
-  }
+template <typename T, typename U>
+void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
+  const int random_seed = 0;
+  std::mt19937 gen(random_seed);
+  std::uniform_int_distribution<T> d(lower, upper);
+  out->resize(N, static_cast<T>(0));
+  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen));
});
 }
 
-template <typename T>
+template <typename T, typename U>
 void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
-                 std::vector<T>* out) {
+                 std::vector<U>* out) {
   std::mt19937 gen(seed);
   std::uniform_real_distribution<T> d(min_value, max_value);
-  for (int64_t i = 0; i < n; ++i) {
-    out->push_back(d(gen));
-  }
+  out->resize(n, static_cast<T>(0));
+  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen));
});
 }
 
 template <typename T>
@@ -115,7 +111,8 @@ inline Status CopyBufferFromVector(const std::vector<T>& values,
MemoryPool* poo
 
   auto buffer = std::make_shared<PoolBuffer>(pool);
   RETURN_NOT_OK(buffer->Resize(nbytes));
-  memcpy(buffer->mutable_data(), values.data(), nbytes);
+  auto immutable_data = reinterpret_cast<const uint8_t*>(values.data());
+  std::copy(immutable_data, immutable_data + nbytes, buffer->mutable_data());
 
   *result = buffer;
   return Status::OK();
@@ -143,56 +140,173 @@ static inline Status GetBitmapFromVector(const std::vector<T>&
is_valid,
 // Sets approximately pct_null of the first n bytes in null_bytes to zero
 // and the rest to non-zero (true) values.
 static inline void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
-  Random rng(random_seed());
-  for (int64_t i = 0; i < n; ++i) {
-    null_bytes[i] = rng.NextDoubleFraction() > pct_null;
-  }
+  const int random_seed = 0;
+  std::mt19937 gen(random_seed);
+  std::uniform_real_distribution<double> d(0.0, 1.0);
+  std::generate(null_bytes, null_bytes + n,
+                [&d, &gen, &pct_null] { return d(gen) > pct_null; });
 }
 
 static inline void random_is_valid(int64_t n, double pct_null,
                                    std::vector<bool>* is_valid) {
-  Random rng(random_seed());
-  for (int64_t i = 0; i < n; ++i) {
-    is_valid->push_back(rng.NextDoubleFraction() > pct_null);
-  }
+  const int random_seed = 0;
+  std::mt19937 gen(random_seed);
+  std::uniform_real_distribution<double> d(0.0, 1.0);
+  is_valid->resize(n, false);
+  std::generate(is_valid->begin(), is_valid->end(),
+                [&d, &gen, &pct_null] { return d(gen) > pct_null; });
 }
 
 static inline void random_bytes(int64_t n, uint32_t seed, uint8_t* out) {
   std::mt19937 gen(seed);
-  std::uniform_int_distribution<int> d(0, 255);
+  std::uniform_int_distribution<int> d(0, std::numeric_limits<uint8_t>::max());
+  std::generate(out, out + n, [&d, &gen] { return static_cast<uint8_t>(d(gen)
& 0xFF); });
+}
 
-  for (int64_t i = 0; i < n; ++i) {
-    out[i] = static_cast<uint8_t>(d(gen) & 0xFF);
+static void DecimalRange(int32_t precision, Decimal128* min_decimal,
+                         Decimal128* max_decimal) {
+  DCHECK_GE(precision, 1) << "decimal precision must be greater than or equal to 1,
got "
+                          << precision;
+  DCHECK_LE(precision, 38) << "decimal precision must be less than or equal to 38,
got "
+                           << precision;
+
+  switch (precision) {
+    case 1:
+    case 2:
+      *max_decimal = std::numeric_limits<int8_t>::max();
+      break;
+    case 3:
+    case 4:
+      *max_decimal = std::numeric_limits<int16_t>::max();
+      break;
+    case 5:
+    case 6:
+      *max_decimal = 8388607;
+      break;
+    case 7:
+    case 8:
+    case 9:
+      *max_decimal = std::numeric_limits<int32_t>::max();
+      break;
+    case 10:
+    case 11:
+      *max_decimal = 549755813887;
+      break;
+    case 12:
+    case 13:
+    case 14:
+      *max_decimal = 140737488355327;
+      break;
+    case 15:
+    case 16:
+      *max_decimal = 36028797018963967;
+      break;
+    case 17:
+    case 18:
+      *max_decimal = std::numeric_limits<int64_t>::max();
+      break;
+    case 19:
+    case 20:
+    case 21:
+      *max_decimal = Decimal128("2361183241434822606847");
+      break;
+    case 22:
+    case 23:
+      *max_decimal = Decimal128("604462909807314587353087");
+      break;
+    case 24:
+    case 25:
+    case 26:
+      *max_decimal = Decimal128("154742504910672534362390527");
+      break;
+    case 27:
+    case 28:
+      *max_decimal = Decimal128("39614081257132168796771975167");
+      break;
+    case 29:
+    case 30:
+    case 31:
+      *max_decimal = Decimal128("10141204801825835211973625643007");
+      break;
+    case 32:
+    case 33:
+      *max_decimal = Decimal128("2596148429267413814265248164610047");
+      break;
+    case 34:
+    case 35:
+      *max_decimal = Decimal128("664613997892457936451903530140172287");
+      break;
+    case 36:
+    case 37:
+    case 38:
+      *max_decimal = Decimal128("170141183460469231731687303715884105727");
+      break;
+    default:
+      DCHECK(false);
+      break;
   }
+
+  *min_decimal = ~(*max_decimal);
 }
 
-static inline void random_ascii(int64_t n, uint32_t seed, uint8_t* out) {
+class UniformDecimalDistribution {
+ public:
+  explicit UniformDecimalDistribution(int32_t precision) {
+    Decimal128 max_decimal;
+    Decimal128 min_decimal;
+    DecimalRange(precision, &min_decimal, &max_decimal);
+
+    const auto min_low = static_cast<int64_t>(min_decimal.low_bits());
+    const auto max_low = static_cast<int64_t>(max_decimal.low_bits());
+
+    const int64_t min_high = min_decimal.high_bits();
+    const int64_t max_high = max_decimal.high_bits();
+
+    using param_type = std::uniform_int_distribution<int64_t>::param_type;
+
+    lower_dist_.param(param_type(min_low, max_low));
+    upper_dist_.param(param_type(min_high, max_high));
+  }
+
+  template <typename Generator>
+  Decimal128 operator()(Generator& gen) {
+    return Decimal128(upper_dist_(gen), static_cast<uint64_t>(lower_dist_(gen)));
+  }
+
+ private:
+  // The lower bits distribution is intentionally int64_t.
+  // If it were uint64_t then the size of the interval [min_high, max_high] would be 0
+  // because min_high > max_high due to 2's complement.
+  // So, we generate the same range of bits using int64_t and then cast to uint64_t.
+  std::uniform_int_distribution<int64_t> lower_dist_;
+  std::uniform_int_distribution<int64_t> upper_dist_;
+};
+
+static inline void random_decimals(int64_t n, uint32_t seed, int32_t precision,
+                                   uint8_t* out) {
   std::mt19937 gen(seed);
-  std::uniform_int_distribution<int> d(65, 122);
+  UniformDecimalDistribution dist(precision);
 
-  for (int64_t i = 0; i < n; ++i) {
-    out[i] = static_cast<uint8_t>(d(gen) & 0xFF);
+  for (int64_t i = 0; i < n; ++i, out += 16) {
+    const Decimal128 value(dist(gen));
+    value.ToBytes(out);
   }
 }
 
-template <typename T>
-void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, T* out) {
+template <typename T, typename U>
+void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
   DCHECK(out || (n == 0));
   std::mt19937 gen(seed);
   std::uniform_int_distribution<T> d(min_value, max_value);
-  for (int64_t i = 0; i < n; ++i) {
-    out[i] = static_cast<T>(d(gen));
-  }
+  std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
+static inline void random_ascii(int64_t n, uint32_t seed, uint8_t* out) {
+  rand_uniform_int(n, seed, static_cast<int32_t>('A'), static_cast<int32_t>('z'),
out);
 }
 
 static inline int64_t null_count(const std::vector<uint8_t>& valid_bytes) {
-  int64_t result = 0;
-  for (size_t i = 0; i < valid_bytes.size(); ++i) {
-    if (valid_bytes[i] == 0) {
-      ++result;
-    }
-  }
-  return result;
+  return static_cast<int64_t>(std::count(valid_bytes.cbegin(), valid_bytes.cend(),
'\0'));
 }
 
 Status MakeRandomInt32PoolBuffer(int64_t length, MemoryPool* pool,
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 5df5e74..7810a3b 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -38,7 +38,6 @@ install(FILES
   logging.h
   macros.h
   parallel.h
-  random.h
   rle-encoding.h
   sse-util.h
   stl.h
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 9d94bef..cc18025 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -43,14 +43,17 @@ Decimal128::Decimal128(const uint8_t* bytes)
 }
 
 std::array<uint8_t, 16> Decimal128::ToBytes() const {
-  const uint64_t raw[] = {BitUtil::ToLittleEndian(low_bits_),
-                          BitUtil::ToLittleEndian(static_cast<uint64_t>(high_bits_))};
-  const auto* raw_data = reinterpret_cast<const uint8_t*>(raw);
   std::array<uint8_t, 16> out{{0}};
-  std::copy(raw_data, raw_data + out.size(), out.begin());
+  ToBytes(out.data());
   return out;
 }
 
+void Decimal128::ToBytes(uint8_t* out) const {
+  DCHECK_NE(out, NULLPTR);
+  reinterpret_cast<uint64_t*>(out)[0] = BitUtil::ToLittleEndian(low_bits_);
+  reinterpret_cast<int64_t*>(out)[1] = BitUtil::ToLittleEndian(high_bits_);
+}
+
 static constexpr Decimal128 kTenTo36(static_cast<int64_t>(0xC097CE7BC90715),
                                      0xB34B9F1000000000);
 static constexpr Decimal128 kTenTo18(0xDE0B6B3A7640000);
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 487f222..a0423e9 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -102,13 +102,14 @@ class ARROW_EXPORT Decimal128 {
   Decimal128& operator>>=(uint32_t bits);
 
   /// \brief Get the high bits of the two's complement representation of the number.
-  int64_t high_bits() const { return high_bits_; }
+  inline int64_t high_bits() const { return high_bits_; }
 
   /// \brief Get the low bits of the two's complement representation of the number.
-  uint64_t low_bits() const { return low_bits_; }
+  inline uint64_t low_bits() const { return low_bits_; }
 
   /// \brief Return the raw bytes of the value in little-endian byte order.
   std::array<uint8_t, 16> ToBytes() const;
+  void ToBytes(uint8_t* out) const;
 
   /// \brief Convert the Decimal128 value to a base 10 decimal string with the given
   /// scale.
diff --git a/cpp/src/arrow/util/random.h b/cpp/src/arrow/util/random.h
deleted file mode 100644
index 2e05a73..0000000
--- a/cpp/src/arrow/util/random.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-// Moved from Kudu http://github.com/cloudera/kudu
-
-#ifndef ARROW_UTIL_RANDOM_H_
-#define ARROW_UTIL_RANDOM_H_
-
-#include <stdint.h>
-
-#include <cmath>
-
-namespace arrow {
-namespace internal {
-namespace random {
-
-static const uint32_t M = 2147483647L;  // 2^31-1
-const double kTwoPi = 6.283185307179586476925286;
-
-}  // namespace random
-}  // namespace internal
-
-// A very simple random number generator.  Not especially good at
-// generating truly random bits, but good enough for our needs in this
-// package. This implementation is not thread-safe.
-class Random {
- public:
-  explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
-    // Avoid bad seeds.
-    if (seed_ == 0 || seed_ == internal::random::M) {
-      seed_ = 1;
-    }
-  }
-
-  // Next pseudo-random 32-bit unsigned integer.
-  // FIXME: This currently only generates 31 bits of randomness.
-  // The MSB will always be zero.
-  uint32_t Next() {
-    static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
-    // We are computing
-    //       seed_ = (seed_ * A) % M,    where M = 2^31-1
-    //
-    // seed_ must not be zero or M, or else all subsequent computed values
-    // will be zero or M respectively.  For all other values, seed_ will end
-    // up cycling through every number in [1,M-1]
-    uint64_t product = seed_ * A;
-
-    // Compute (product % M) using the fact that ((x << 31) % M) == x.
-    seed_ = static_cast<uint32_t>((product >> 31) + (product & internal::random::M));
-    // The first reduction may overflow by 1 bit, so we may need to
-    // repeat.  mod == M is not possible; using > allows the faster
-    // sign-bit-based test.
-    if (seed_ > internal::random::M) {
-      seed_ -= internal::random::M;
-    }
-    return seed_;
-  }
-
-  // Alias for consistency with Next64
-  uint32_t Next32() { return Next(); }
-
-  // Next pseudo-random 64-bit unsigned integer.
-  // FIXME: This currently only generates 62 bits of randomness due to Next()
-  // only giving 31 bits of randomness. The 2 most significant bits will always
-  // be zero.
-  uint64_t Next64() {
-    uint64_t large = Next();
-    // Only shift by 31 bits so we end up with zeros in MSB and not scattered
-    // throughout the 64-bit word. This is due to the weakness in Next() noted
-    // above.
-    large <<= 31;
-    large |= Next();
-    return large;
-  }
-
-  // Returns a uniformly distributed value in the range [0..n-1]
-  // REQUIRES: n > 0
-  uint32_t Uniform(uint32_t n) { return Next() % n; }
-
-  // Alias for consistency with Uniform64
-  uint32_t Uniform32(uint32_t n) { return Uniform(n); }
-
-  // Returns a uniformly distributed 64-bit value in the range [0..n-1]
-  // REQUIRES: n > 0
-  uint64_t Uniform64(uint64_t n) { return Next64() % n; }
-
-  // Randomly returns true ~"1/n" of the time, and false otherwise.
-  // REQUIRES: n > 0
-  bool OneIn(int n) { return (Next() % n) == 0; }
-
-  // Skewed: pick "base" uniformly from range [0,max_log] and then
-  // return "base" random bits.  The effect is to pick a number in the
-  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
-  uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); }
-
-  // Creates a normal distribution variable using the
-  // Box-Muller transform. See:
-  // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
-  // Adapted from WebRTC source code at:
-  // webrtc/trunk/modules/video_coding/main/test/test_util.cc
-  double Normal(double mean, double std_dev) {
-    double uniform1 = (Next() + 1.0) / (internal::random::M + 1.0);
-    double uniform2 = (Next() + 1.0) / (internal::random::M + 1.0);
-    return (mean +
-            std_dev * sqrt(-2 * ::log(uniform1)) *
-                cos(internal::random::kTwoPi * uniform2));
-  }
-
-  // Return a random number between 0.0 and 1.0 inclusive.
-  double NextDoubleFraction() {
-    return Next() / static_cast<double>(internal::random::M + 1.0);
-  }
-
- private:
-  uint32_t seed_;
-};
-
-uint32_t random_seed() {
-  // TODO(wesm): use system time to get a reasonably random seed
-  return 0;
-}
-
-}  // namespace arrow
-
-#endif  // ARROW_UTIL_RANDOM_H_

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <commits@arrow.apache.org>'].

Mime
View raw message