Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id B8FCF200BE2 for ; Wed, 30 Nov 2016 18:30:50 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id B7D75160B06; Wed, 30 Nov 2016 17:30:50 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 90FFA160B13 for ; Wed, 30 Nov 2016 18:30:49 +0100 (CET) Received: (qmail 90307 invoked by uid 500); 30 Nov 2016 17:30:48 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 90272 invoked by uid 99); 30 Nov 2016 17:30:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 30 Nov 2016 17:30:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 95FCDDFC47; Wed, 30 Nov 2016 17:30:48 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: danburkert@apache.org To: commits@kudu.apache.org Date: Wed, 30 Nov 2016 17:30:49 -0000 Message-Id: <42caf44d3a4b4294a2b2a9815d3bfac0@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [2/2] kudu git commit: [c++] ColumnPredicate simplification for inequalities on boundary values archived-at: Wed, 30 Nov 2016 17:30:50 -0000 [c++] ColumnPredicate simplification for inequalities on boundary values Adds special casing for inequality predicates on minimum and maximum values. This tightens partition pruning in boundary cases. New tests are added to cover the specific transformations. We already have good test coverage of boundary values in predicate-test. These changes uncovered a bug in type traits where the minimum value for float types was defined as the lowest discrete value instead of -infinity, after adding the predicate simplification multiple test cases in predicate-test failed. Change-Id: I9bb150249a51a69fb85af67d34e85b7d7bed325c Reviewed-on: http://gerrit.cloudera.org:8080/5273 Reviewed-by: Adar Dembo Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/835cf6c1 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/835cf6c1 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/835cf6c1 Branch: refs/heads/master Commit: 835cf6c1e38175c0bb019e62a294ed2bd9dc8102 Parents: d74888c Author: Dan Burkert Authored: Tue Nov 29 18:45:43 2016 -0800 Committer: Dan Burkert Committed: Wed Nov 30 17:25:59 2016 +0000 ---------------------------------------------------------------------- src/kudu/common/column_predicate-test.cc | 108 ++++++++++++++++++++++++++ src/kudu/common/column_predicate.cc | 31 ++++++-- src/kudu/common/partition_pruner-test.cc | 3 +- src/kudu/common/types.cc | 1 + src/kudu/common/types.h | 53 ++++++++++++- 5 files changed, 184 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/column_predicate-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc index ba691f2..fcafcff 100644 --- a/src/kudu/common/column_predicate-test.cc +++ b/src/kudu/common/column_predicate-test.cc @@ -795,6 +795,114 @@ TEST_F(TestColumnPredicate, TestExclusive) { } } +TEST_F(TestColumnPredicate, TestLess) { + ColumnSchema i8("i8", INT8); + ColumnSchema i16("i16", INT16); + ColumnSchema i32("i32", INT32); + ColumnSchema i64("i64", INT64); + ColumnSchema micros("micros", UNIXTIME_MICROS); + ColumnSchema f32("f32", FLOAT); + ColumnSchema f64("f64", DOUBLE); + ColumnSchema string("string", STRING); + ColumnSchema binary("binary", BINARY); + + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(i8, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(i16, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(i32, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(i64, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(micros, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(f32, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(f64, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(string, nullptr, TypeTraits::min_value()) + .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(binary, nullptr, TypeTraits::min_value()) + .predicate_type()); +} + +TEST_F(TestColumnPredicate, TestGreaterThanEquals) { + ColumnSchema i8("i8", INT8); + ColumnSchema i16("i16", INT16); + ColumnSchema i32("i32", INT32); + ColumnSchema i64("i64", INT64); + ColumnSchema micros("micros", UNIXTIME_MICROS); + ColumnSchema f32("f32", FLOAT); + ColumnSchema f64("f64", DOUBLE); + ColumnSchema string("string", STRING); + ColumnSchema binary("binary", BINARY); + + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(i8, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(i16, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(i32, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(i64, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(micros, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(f32, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(f64, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(string, TypeTraits::min_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(binary, TypeTraits::min_value(), nullptr) + .predicate_type()); + + ASSERT_EQ(PredicateType::Equality, + ColumnPredicate::Range(i8, TypeTraits::max_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::Equality, + ColumnPredicate::Range(i16, TypeTraits::max_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::Equality, + ColumnPredicate::Range(i32, TypeTraits::max_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::Equality, + ColumnPredicate::Range(i64, TypeTraits::max_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::Equality, + ColumnPredicate::Range(micros, TypeTraits::max_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::Equality, + ColumnPredicate::Range(f32, TypeTraits::max_value(), nullptr) + .predicate_type()); + ASSERT_EQ(PredicateType::Equality, + ColumnPredicate::Range(f64, TypeTraits::max_value(), nullptr) + .predicate_type()); + + Slice s = "foo"; + ASSERT_EQ(PredicateType::Range, + ColumnPredicate::Range(string, &s, nullptr).predicate_type()); + ASSERT_EQ(PredicateType::Range, + ColumnPredicate::Range(binary, &s, nullptr).predicate_type()); +} + // Test the InList constructor. TEST_F(TestColumnPredicate, TestInList) { vector values; http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/column_predicate.cc ---------------------------------------------------------------------- diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc index 28c182a..0e8c959 100644 --- a/src/kudu/common/column_predicate.cc +++ b/src/kudu/common/column_predicate.cc @@ -154,25 +154,40 @@ void ColumnPredicate::SetToNone() { } void ColumnPredicate::Simplify() { - // TODO(dan): we are missing some simplification opportunities here: - // * range predicates including the entire range of a bool/integer - // (`my_int8 >= -127`) can be simplified to `IS NOT NULL`. - // * `IN` predicates including all values of a bool/integer - // (`my_bool IN (true, false)`) can be simplified to `IS NOT NULL`. + auto type_info = column_.type_info(); switch (predicate_type_) { case PredicateType::None: case PredicateType::Equality: case PredicateType::IsNotNull: return; case PredicateType::Range: { + DCHECK(lower_ != nullptr || upper_ != nullptr); if (lower_ != nullptr && upper_ != nullptr) { - if (column_.type_info()->Compare(lower_, upper_) >= 0) { + // _ <= VALUE < _ + if (type_info->Compare(lower_, upper_) >= 0) { // If the range bounds are empty then no results can be returned. SetToNone(); - } else if (column_.type_info()->AreConsecutive(lower_, upper_)) { + } else if (type_info->AreConsecutive(lower_, upper_)) { // If the values are consecutive, then it is an equality bound. predicate_type_ = PredicateType::Equality; upper_ = nullptr; } + } else if (lower_ != nullptr) { + // VALUE >= _ + if (type_info->IsMinValue(lower_)) { + predicate_type_ = PredicateType::IsNotNull; + lower_ = nullptr; + upper_ = nullptr; + } else if (type_info->IsMaxValue(lower_)) { + predicate_type_ = PredicateType::Equality; + upper_ = nullptr; + } + } else if (upper_ != nullptr) { + // VALUE < _ + if (type_info->IsMinValue(upper_)) { + predicate_type_ = PredicateType::None; + lower_ = nullptr; + upper_ = nullptr; + } } return; }; @@ -185,7 +200,7 @@ void ColumnPredicate::Simplify() { predicate_type_ = PredicateType::Equality; lower_ = values_[0]; values_.clear(); - } else if (column_.type_info()->type() == BOOL) { + } else if (type_info->type() == BOOL) { // If this is a boolean IN list with both true and false in the list, // then we can just convert it to IS NOT NULL. This same simplification // could be done for other integer types, but it's probably not as http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/partition_pruner-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/common/partition_pruner-test.cc b/src/kudu/common/partition_pruner-test.cc index b9c5fa2..0a0e8e9 100644 --- a/src/kudu/common/partition_pruner-test.cc +++ b/src/kudu/common/partition_pruner-test.cc @@ -353,8 +353,7 @@ TEST(TestPartitionPruner, TestRangePruning) { Check({ ColumnPredicate::Range(schema.column(2), nullptr, &hundred) }, 3); // c < MIN - // TODO(dan): this should prune all partitions. - Check({ ColumnPredicate::Range(schema.column(2), nullptr, &min) }, 1); + Check({ ColumnPredicate::Range(schema.column(2), nullptr, &min) }, 0); // c < MAX Check({ ColumnPredicate::Range(schema.column(2), nullptr, &max) }, 3); http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/types.cc ---------------------------------------------------------------------- diff --git a/src/kudu/common/types.cc b/src/kudu/common/types.cc index 9101353..ed20081 100644 --- a/src/kudu/common/types.cc +++ b/src/kudu/common/types.cc @@ -34,6 +34,7 @@ TypeInfo::TypeInfo(TypeTraitsClass t) name_(TypeTraitsClass::name()), size_(TypeTraitsClass::size), min_value_(TypeTraitsClass::min_value()), + max_value_(TypeTraitsClass::max_value()), append_func_(TypeTraitsClass::AppendDebugStringForValue), compare_func_(TypeTraitsClass::Compare), are_consecutive_func_(TypeTraitsClass::AreConsecutive) { http://git-wip-us.apache.org/repos/asf/kudu/blob/835cf6c1/src/kudu/common/types.h ---------------------------------------------------------------------- diff --git a/src/kudu/common/types.h b/src/kudu/common/types.h index 46f0452..71df079 100644 --- a/src/kudu/common/types.h +++ b/src/kudu/common/types.h @@ -60,6 +60,12 @@ class TypeInfo { void CopyMinValue(void* dst) const { memcpy(dst, min_value_, size_); } + bool IsMinValue(const void* value) const { + return Compare(value, min_value_) == 0; + } + bool IsMaxValue(const void* value) const { + return max_value_ != nullptr && Compare(value, max_value_) == 0; + } private: friend class TypeInfoResolver; @@ -70,6 +76,8 @@ class TypeInfo { const string name_; const size_t size_; const void* const min_value_; + // The maximum value of the type, or null if the type has no max value. + const void* const max_value_; typedef void (*AppendDebugFunc)(const void *, string *); const AppendDebugFunc append_func_; @@ -133,6 +141,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -154,6 +165,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -175,6 +189,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -196,6 +213,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -217,6 +237,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -238,6 +261,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -259,6 +285,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -280,6 +309,9 @@ struct DataTypeTraits { static const cpp_type* min_value() { return &MathLimits::kMin; } + static const cpp_type* max_value() { + return &MathLimits::kMax; + } }; template<> @@ -299,7 +331,10 @@ struct DataTypeTraits { return AreFloatsConsecutive(a, b); } static const cpp_type* min_value() { - return &MathLimits::kMin; + return &MathLimits::kNegInf; + } + static const cpp_type* max_value() { + return &MathLimits::kPosInf; } }; @@ -320,7 +355,10 @@ struct DataTypeTraits { return AreFloatsConsecutive(a, b); } static const cpp_type* min_value() { - return &MathLimits::kMin; + return &MathLimits::kNegInf; + } + static const cpp_type* max_value() { + return &MathLimits::kPosInf; } }; @@ -357,6 +395,9 @@ struct DataTypeTraits { static Slice s(""); return &s; } + static const cpp_type* max_value() { + return nullptr; + } }; template<> @@ -379,6 +420,10 @@ struct DataTypeTraits { static bool b = false; return &b; } + static const cpp_type* max_value() { + static bool b = true; + return &b; + } }; // Base class for types that are derived, that is that have some other type as the @@ -403,6 +448,10 @@ struct DerivedTypeTraits { static const cpp_type* min_value() { return DataTypeTraits::min_value(); } + + static const cpp_type* max_value() { + return DataTypeTraits::max_value(); + } }; template<>