kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject [2/2] kudu git commit: [c++] ColumnPredicate simplification for inequalities on boundary values
Date Wed, 30 Nov 2016 17:30:49 GMT
[c++] ColumnPredicate simplification for inequalities on boundary values

Adds special casing for inequality predicates on minimum and maximum
values. This tightens partition pruning in boundary cases. New tests are
added to cover the specific transformations. We already have good test
coverage of boundary values in predicate-test.

These changes uncovered a bug in type traits where the minimum value for
float types was defined as the lowest discrete value instead of
-infinity, after adding the predicate simplification multiple test cases
in predicate-test failed.

Change-Id: I9bb150249a51a69fb85af67d34e85b7d7bed325c
Reviewed-on: http://gerrit.cloudera.org:8080/5273
Reviewed-by: Adar Dembo <adar@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/835cf6c1
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/835cf6c1
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/835cf6c1

Branch: refs/heads/master
Commit: 835cf6c1e38175c0bb019e62a294ed2bd9dc8102
Parents: d74888c
Author: Dan Burkert <danburkert@apache.org>
Authored: Tue Nov 29 18:45:43 2016 -0800
Committer: Dan Burkert <danburkert@apache.org>
Committed: Wed Nov 30 17:25:59 2016 +0000

----------------------------------------------------------------------
 src/kudu/common/column_predicate-test.cc | 108 ++++++++++++++++++++++++++
 src/kudu/common/column_predicate.cc      |  31 ++++++--
 src/kudu/common/partition_pruner-test.cc |   3 +-
 src/kudu/common/types.cc                 |   1 +
 src/kudu/common/types.h                  |  53 ++++++++++++-
 5 files changed, 184 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/column_predicate-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc
index ba691f2..fcafcff 100644
--- a/src/kudu/common/column_predicate-test.cc
+++ b/src/kudu/common/column_predicate-test.cc
@@ -795,6 +795,114 @@ TEST_F(TestColumnPredicate, TestExclusive) {
   }
 }
 
+TEST_F(TestColumnPredicate, TestLess) {
+    ColumnSchema i8("i8", INT8);
+    ColumnSchema i16("i16", INT16);
+    ColumnSchema i32("i32", INT32);
+    ColumnSchema i64("i64", INT64);
+    ColumnSchema micros("micros", UNIXTIME_MICROS);
+    ColumnSchema f32("f32", FLOAT);
+    ColumnSchema f64("f64", DOUBLE);
+    ColumnSchema string("string", STRING);
+    ColumnSchema binary("binary", BINARY);
+
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(i8, nullptr, TypeTraits<INT8>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(i16, nullptr, TypeTraits<INT16>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(i32, nullptr, TypeTraits<INT32>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(i64, nullptr, TypeTraits<INT64>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(micros, nullptr, TypeTraits<UNIXTIME_MICROS>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(f32, nullptr, TypeTraits<FLOAT>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(f64, nullptr, TypeTraits<DOUBLE>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(string, nullptr, TypeTraits<STRING>::min_value())
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(binary, nullptr, TypeTraits<BINARY>::min_value())
+                              .predicate_type());
+}
+
+TEST_F(TestColumnPredicate, TestGreaterThanEquals) {
+    ColumnSchema i8("i8", INT8);
+    ColumnSchema i16("i16", INT16);
+    ColumnSchema i32("i32", INT32);
+    ColumnSchema i64("i64", INT64);
+    ColumnSchema micros("micros", UNIXTIME_MICROS);
+    ColumnSchema f32("f32", FLOAT);
+    ColumnSchema f64("f64", DOUBLE);
+    ColumnSchema string("string", STRING);
+    ColumnSchema binary("binary", BINARY);
+
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(i8, TypeTraits<INT8>::min_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(i16, TypeTraits<INT16>::min_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(i32, TypeTraits<INT32>::min_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(i64, TypeTraits<INT64>::min_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(micros, TypeTraits<UNIXTIME_MICROS>::min_value(),
nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(f32, TypeTraits<FLOAT>::min_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(f64, TypeTraits<DOUBLE>::min_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(string, TypeTraits<STRING>::min_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::IsNotNull,
+              ColumnPredicate::Range(binary, TypeTraits<BINARY>::min_value(), nullptr)
+                              .predicate_type());
+
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(i8, TypeTraits<INT8>::max_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(i16, TypeTraits<INT16>::max_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(i32, TypeTraits<INT32>::max_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(i64, TypeTraits<INT64>::max_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(micros, TypeTraits<UNIXTIME_MICROS>::max_value(),
nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(f32, TypeTraits<FLOAT>::max_value(), nullptr)
+                              .predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(f64, TypeTraits<DOUBLE>::max_value(), nullptr)
+                              .predicate_type());
+
+    Slice s = "foo";
+    ASSERT_EQ(PredicateType::Range,
+              ColumnPredicate::Range(string, &s, nullptr).predicate_type());
+    ASSERT_EQ(PredicateType::Range,
+              ColumnPredicate::Range(binary, &s, nullptr).predicate_type());
+}
+
 // Test the InList constructor.
 TEST_F(TestColumnPredicate, TestInList) {
     vector<const void*> values;

http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/column_predicate.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index 28c182a..0e8c959 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -154,25 +154,40 @@ void ColumnPredicate::SetToNone() {
 }
 
 void ColumnPredicate::Simplify() {
-  // TODO(dan): we are missing some simplification opportunities here:
-  //    * range predicates including the entire range of a bool/integer
-  //      (`my_int8 >= -127`) can be simplified to `IS NOT NULL`.
-  //    * `IN` predicates including all values of a bool/integer
-  //      (`my_bool IN (true, false)`) can be simplified to `IS NOT NULL`.
+  auto type_info = column_.type_info();
   switch (predicate_type_) {
     case PredicateType::None:
     case PredicateType::Equality:
     case PredicateType::IsNotNull: return;
     case PredicateType::Range: {
+      DCHECK(lower_ != nullptr || upper_ != nullptr);
       if (lower_ != nullptr && upper_ != nullptr) {
-        if (column_.type_info()->Compare(lower_, upper_) >= 0) {
+        // _ <= VALUE < _
+        if (type_info->Compare(lower_, upper_) >= 0) {
           // If the range bounds are empty then no results can be returned.
           SetToNone();
-        } else if (column_.type_info()->AreConsecutive(lower_, upper_)) {
+        } else if (type_info->AreConsecutive(lower_, upper_)) {
           // If the values are consecutive, then it is an equality bound.
           predicate_type_ = PredicateType::Equality;
           upper_ = nullptr;
         }
+      } else if (lower_ != nullptr) {
+        // VALUE >= _
+        if (type_info->IsMinValue(lower_)) {
+          predicate_type_ = PredicateType::IsNotNull;
+          lower_ = nullptr;
+          upper_ = nullptr;
+        } else if (type_info->IsMaxValue(lower_)) {
+          predicate_type_ = PredicateType::Equality;
+          upper_ = nullptr;
+        }
+      } else if (upper_ != nullptr) {
+        // VALUE < _
+        if (type_info->IsMinValue(upper_)) {
+          predicate_type_ = PredicateType::None;
+          lower_ = nullptr;
+          upper_ = nullptr;
+        }
       }
       return;
     };
@@ -185,7 +200,7 @@ void ColumnPredicate::Simplify() {
         predicate_type_ = PredicateType::Equality;
         lower_ = values_[0];
         values_.clear();
-      } else if (column_.type_info()->type() == BOOL) {
+      } else if (type_info->type() == BOOL) {
         // If this is a boolean IN list with both true and false in the list,
         // then we can just convert it to IS NOT NULL. This same simplification
         // could be done for other integer types, but it's probably not as

http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/partition_pruner-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/partition_pruner-test.cc b/src/kudu/common/partition_pruner-test.cc
index b9c5fa2..0a0e8e9 100644
--- a/src/kudu/common/partition_pruner-test.cc
+++ b/src/kudu/common/partition_pruner-test.cc
@@ -353,8 +353,7 @@ TEST(TestPartitionPruner, TestRangePruning) {
   Check({ ColumnPredicate::Range(schema.column(2), nullptr, &hundred) }, 3);
 
   // c < MIN
-  // TODO(dan): this should prune all partitions.
-  Check({ ColumnPredicate::Range(schema.column(2), nullptr, &min) }, 1);
+  Check({ ColumnPredicate::Range(schema.column(2), nullptr, &min) }, 0);
 
   // c < MAX
   Check({ ColumnPredicate::Range(schema.column(2), nullptr, &max) }, 3);

http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/types.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/types.cc b/src/kudu/common/types.cc
index 9101353..ed20081 100644
--- a/src/kudu/common/types.cc
+++ b/src/kudu/common/types.cc
@@ -34,6 +34,7 @@ TypeInfo::TypeInfo(TypeTraitsClass t)
     name_(TypeTraitsClass::name()),
     size_(TypeTraitsClass::size),
     min_value_(TypeTraitsClass::min_value()),
+    max_value_(TypeTraitsClass::max_value()),
     append_func_(TypeTraitsClass::AppendDebugStringForValue),
     compare_func_(TypeTraitsClass::Compare),
     are_consecutive_func_(TypeTraitsClass::AreConsecutive) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/types.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/types.h b/src/kudu/common/types.h
index 46f0452..71df079 100644
--- a/src/kudu/common/types.h
+++ b/src/kudu/common/types.h
@@ -60,6 +60,12 @@ class TypeInfo {
   void CopyMinValue(void* dst) const {
     memcpy(dst, min_value_, size_);
   }
+  bool IsMinValue(const void* value) const {
+    return Compare(value, min_value_) == 0;
+  }
+  bool IsMaxValue(const void* value) const {
+    return max_value_ != nullptr && Compare(value, max_value_) == 0;
+  }
 
  private:
   friend class TypeInfoResolver;
@@ -70,6 +76,8 @@ class TypeInfo {
   const string name_;
   const size_t size_;
   const void* const min_value_;
+  // The maximum value of the type, or null if the type has no max value.
+  const void* const max_value_;
 
   typedef void (*AppendDebugFunc)(const void *, string *);
   const AppendDebugFunc append_func_;
@@ -133,6 +141,9 @@ struct DataTypeTraits<UINT8> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -154,6 +165,9 @@ struct DataTypeTraits<INT8> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -175,6 +189,9 @@ struct DataTypeTraits<UINT16> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -196,6 +213,9 @@ struct DataTypeTraits<INT16> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -217,6 +237,9 @@ struct DataTypeTraits<UINT32> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -238,6 +261,9 @@ struct DataTypeTraits<INT32> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -259,6 +285,9 @@ struct DataTypeTraits<UINT64> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -280,6 +309,9 @@ struct DataTypeTraits<INT64> {
   static const cpp_type* min_value() {
     return &MathLimits<cpp_type>::kMin;
   }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kMax;
+  }
 };
 
 template<>
@@ -299,7 +331,10 @@ struct DataTypeTraits<FLOAT> {
     return AreFloatsConsecutive<FLOAT>(a, b);
   }
   static const cpp_type* min_value() {
-    return &MathLimits<cpp_type>::kMin;
+    return &MathLimits<cpp_type>::kNegInf;
+  }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kPosInf;
   }
 };
 
@@ -320,7 +355,10 @@ struct DataTypeTraits<DOUBLE> {
     return AreFloatsConsecutive<DOUBLE>(a, b);
   }
   static const cpp_type* min_value() {
-    return &MathLimits<cpp_type>::kMin;
+    return &MathLimits<cpp_type>::kNegInf;
+  }
+  static const cpp_type* max_value() {
+    return &MathLimits<cpp_type>::kPosInf;
   }
 };
 
@@ -357,6 +395,9 @@ struct DataTypeTraits<BINARY> {
     static Slice s("");
     return &s;
   }
+  static const cpp_type* max_value() {
+    return nullptr;
+  }
 };
 
 template<>
@@ -379,6 +420,10 @@ struct DataTypeTraits<BOOL> {
     static bool b = false;
     return &b;
   }
+  static const cpp_type* max_value() {
+    static bool b = true;
+    return &b;
+  }
 };
 
 // Base class for types that are derived, that is that have some other type as the
@@ -403,6 +448,10 @@ struct DerivedTypeTraits {
   static const cpp_type* min_value() {
     return DataTypeTraits<PhysicalType>::min_value();
   }
+
+  static const cpp_type* max_value() {
+    return DataTypeTraits<PhysicalType>::max_value();
+  }
 };
 
 template<>


Mime
View raw message