kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [1/7] incubator-kudu git commit: Add internal IS NOT NULL predicate type
Date Fri, 01 Apr 2016 03:16:03 GMT
Repository: incubator-kudu
Updated Branches:
  refs/heads/master d5938e951 -> 626c3a839


Add internal IS NOT NULL predicate type

This commit adds an internal IS NOT NULL predicate type, and changes LESS_EQUAL
predicate simplification to result in the IS NOT NULL predicate type when the
conversion to a LESS predicate fails and the column is nullable. This fixes
scans with predicates such as 'WHERE my_nullable_int8_col <= 127`. A followup
commit will contain comprehensive predicate tests covering this case.

Change-Id: Ifcf29b1f274df2ef5c5ac7a7a17cc06dfd59e191
Reviewed-on: http://gerrit.cloudera.org:8080/2671
Reviewed-by: Todd Lipcon <todd@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/9b8b0202
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/9b8b0202
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/9b8b0202

Branch: refs/heads/master
Commit: 9b8b020210905b0cc4dd81e0ca905e098d0cad33
Parents: d5938e9
Author: Dan Burkert <dan@cloudera.com>
Authored: Wed Mar 30 15:29:10 2016 -0700
Committer: Dan Burkert <dan@cloudera.com>
Committed: Thu Mar 31 03:49:45 2016 +0000

----------------------------------------------------------------------
 src/kudu/client/scan_predicate.cc        |  2 +-
 src/kudu/client/scanner-internal.cc      |  4 ++
 src/kudu/common/column_predicate-test.cc | 61 +++++++++++++++++++++++++--
 src/kudu/common/column_predicate.cc      | 48 +++++++++++++++++++--
 src/kudu/common/column_predicate.h       |  6 +++
 src/kudu/common/common.proto             |  3 ++
 src/kudu/tserver/tablet_service.cc       |  4 ++
 7 files changed, 120 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9b8b0202/src/kudu/client/scan_predicate.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/scan_predicate.cc b/src/kudu/client/scan_predicate.cc
index 1520a0b..fc44ba7 100644
--- a/src/kudu/client/scan_predicate.cc
+++ b/src/kudu/client/scan_predicate.cc
@@ -60,10 +60,10 @@ ComparisonPredicateData::ComparisonPredicateData(ColumnSchema col,
       op_(op),
       val_(val) {
 }
+
 ComparisonPredicateData::~ComparisonPredicateData() {
 }
 
-
 Status ComparisonPredicateData::AddToScanSpec(ScanSpec* spec, Arena* arena) {
   void* val_void;
   RETURN_NOT_OK(val_->data_->CheckTypeAndGetPointer(col_.name(),

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9b8b0202/src/kudu/client/scanner-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/scanner-internal.cc b/src/kudu/client/scanner-internal.cc
index 568f381..3a3fc11 100644
--- a/src/kudu/client/scanner-internal.cc
+++ b/src/kudu/client/scanner-internal.cc
@@ -122,6 +122,10 @@ void ColumnPredicateIntoPB(const ColumnPredicate& predicate,
       }
       return;
     };
+    case PredicateType::IsNotNull: {
+      pb->mutable_is_not_null();
+      return;
+    };
     case PredicateType::None: LOG(FATAL) << "None predicate may not be converted to
protobuf";
   }
   LOG(FATAL) << "unknown predicate type";

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9b8b0202/src/kudu/common/column_predicate-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc
index 274a774..9585a24 100644
--- a/src/kudu/common/column_predicate-test.cc
+++ b/src/kudu/common/column_predicate-test.cc
@@ -188,20 +188,58 @@ class TestColumnPredicate : public KuduTest {
               ColumnPredicate::None(column),
               ColumnPredicate::None(column),
               PredicateType::None);
+
+    // IS NOT NULL
+
+    // IS NOT NULL AND
+    // IS NOT NULL
+    // =
+    // IS NOT NULL
+    TestMerge(ColumnPredicate::IsNotNull(column),
+              ColumnPredicate::IsNotNull(column),
+              ColumnPredicate::IsNotNull(column),
+              PredicateType::IsNotNull);
+
+    // IS NOT NULL AND
+    // None
+    // =
+    // None
+    TestMerge(ColumnPredicate::IsNotNull(column),
+              ColumnPredicate::None(column),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+    // IS NOT NULL AND
+    // |
+    // =
+    // |
+    TestMerge(ColumnPredicate::IsNotNull(column),
+              ColumnPredicate::Equality(column, &values[0]),
+              ColumnPredicate::Equality(column, &values[0]),
+              PredicateType::Equality);
+
+    // IS NOT NULL AND
+    // [------)
+    // =
+    // [------)
+    TestMerge(ColumnPredicate::IsNotNull(column),
+              ColumnPredicate::Range(column, &values[0], &values[2]),
+              ColumnPredicate::Range(column, &values[0], &values[2]),
+              PredicateType::Range);
   }
 };
 
 TEST_F(TestColumnPredicate, TestMerge) {
-  TestMergeCombinations(ColumnSchema("c", INT8),
+  TestMergeCombinations(ColumnSchema("c", INT8, true),
                         vector<int8_t> { 0, 1, 2, 3, 4, 5, 6 });
 
-  TestMergeCombinations(ColumnSchema("c", INT32),
+  TestMergeCombinations(ColumnSchema("c", INT32, true),
                         vector<int32_t> { -100, -10, -1, 0, 1, 10, 100 });
 
-  TestMergeCombinations(ColumnSchema("c", STRING),
+  TestMergeCombinations(ColumnSchema("c", STRING, true),
                         vector<Slice> { "a", "b", "c", "d", "e", "f", "g" });
 
-  TestMergeCombinations(ColumnSchema("c", BINARY),
+  TestMergeCombinations(ColumnSchema("c", BINARY, true),
                         vector<Slice> { Slice("", 0),
                                         Slice("\0", 1),
                                         Slice("\0\0", 2),
@@ -265,6 +303,21 @@ TEST_F(TestColumnPredicate, TestInclusiveRange) {
     ASSERT_EQ(boost::none, ColumnPredicate::InclusiveRange(column, nullptr, &max, &arena));
   }
   {
+    ColumnSchema column("c", INT32, true);
+    int32_t zero = 0;
+    int32_t two = 2;
+    int32_t three = 3;
+    int32_t max = INT32_MAX;
+
+    ASSERT_EQ(ColumnPredicate::Range(column, &zero, &three),
+              ColumnPredicate::InclusiveRange(column, &zero, &two, &arena));
+    ASSERT_EQ(ColumnPredicate::Range(column, &zero, nullptr),
+              ColumnPredicate::InclusiveRange(column, &zero, &max, &arena));
+
+    ASSERT_EQ(ColumnPredicate::IsNotNull(column),
+              ColumnPredicate::InclusiveRange(column, nullptr, &max, &arena));
+  }
+  {
     ColumnSchema column("c", STRING);
     Slice zero("", 0);
     Slice two("\0\0", 2);

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9b8b0202/src/kudu/common/column_predicate.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index d1c8c67..e7aef58 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -67,7 +67,14 @@ boost::optional<ColumnPredicate> ColumnPredicate::InclusiveRange(ColumnSchema
co
     memcpy(buf, upper, size);
     if (!key_util::IncrementCell(column, buf, arena)) {
       if (lower == nullptr) {
-        return boost::none;
+        if (column.is_nullable()) {
+          // If incrementing the upper bound fails and the column is nullable,
+          // then return an IS NOT NULL predicate, so that null values will be
+          // filtered.
+          return ColumnPredicate::IsNotNull(move(column));
+        } else {
+          return boost::none;
+        }
       } else {
         upper = nullptr;
       }
@@ -78,6 +85,11 @@ boost::optional<ColumnPredicate> ColumnPredicate::InclusiveRange(ColumnSchema
co
   return ColumnPredicate::Range(move(column), lower, upper);
 }
 
+ColumnPredicate ColumnPredicate::IsNotNull(ColumnSchema column) {
+  CHECK(column.is_nullable());
+  return ColumnPredicate(PredicateType::IsNotNull, move(column), nullptr, nullptr);
+}
+
 ColumnPredicate ColumnPredicate::None(ColumnSchema column) {
   return ColumnPredicate(PredicateType::None, move(column), nullptr, nullptr);
 }
@@ -90,8 +102,9 @@ void ColumnPredicate::SetToNone() {
 
 void ColumnPredicate::Simplify() {
   switch (predicate_type_) {
-    case PredicateType::None: return;
-    case PredicateType::Equality: return;
+    case PredicateType::None:
+    case PredicateType::Equality:
+    case PredicateType::IsNotNull: return;
     case PredicateType::Range: {
       if (lower_ != nullptr && upper_ != nullptr) {
         if (column_.type_info()->Compare(lower_, upper_) >= 0) {
@@ -121,6 +134,18 @@ void ColumnPredicate::Merge(const ColumnPredicate& other) {
       MergeIntoEquality(other);
       return;
     };
+    case PredicateType::IsNotNull: {
+      // NOT NULL is less selective than all other predicate types, so the
+      // intersection of NOT NULL with any other predicate is just the other
+      // predicate.
+      //
+      // Note: this will no longer be true when an IS NULL predicate type is
+      // added.
+      predicate_type_ = other.predicate_type_;
+      lower_ = other.lower_;
+      upper_ = other.upper_;
+      return;
+    };
   }
   LOG(FATAL) << "unknown predicate type";
 }
@@ -163,6 +188,7 @@ void ColumnPredicate::MergeIntoRange(const ColumnPredicate& other)
{
       }
       return;
     };
+    case PredicateType::IsNotNull: return;
   }
   LOG(FATAL) << "unknown predicate type";
 }
@@ -189,6 +215,7 @@ void ColumnPredicate::MergeIntoEquality(const ColumnPredicate& other)
{
       }
       return;
     };
+    case PredicateType::IsNotNull: return;
   }
   LOG(FATAL) << "unknown predicate type";
 }
@@ -260,6 +287,17 @@ void ColumnPredicate::Evaluate(const ColumnBlock& block, SelectionVector
*sel) c
         });
         return;
     };
+    case PredicateType::IsNotNull: {
+      if (!block.is_nullable()) return;
+      // TODO: make this more efficient by using bitwise operations on the
+      // null and selection vectors.
+      for (size_t i = 0; i < block.nrows(); i++) {
+        if (sel->IsRowSelected(i) && block.is_null(i)) {
+          BitmapClear(sel->mutable_bitmap(), i);
+        }
+      }
+      return;
+    }
   }
   LOG(FATAL) << "unknown predicate type";
 }
@@ -282,6 +320,9 @@ string ColumnPredicate::ToString() const {
     case PredicateType::Equality: {
       return strings::Substitute("`$0` = $1", column_.name(), column_.Stringify(lower_));
     };
+    case PredicateType::IsNotNull: {
+      return strings::Substitute("`$0` IS NOT NULL", column_.name());
+    };
   }
   LOG(FATAL) << "unknown predicate type";
 }
@@ -310,6 +351,7 @@ int SelectivityRank(const ColumnPredicate& predicate) {
     case PredicateType::None: return 0;
     case PredicateType::Equality: return 1;
     case PredicateType::Range: return 2;
+    case PredicateType::IsNotNull: return 3;
   }
   LOG(FATAL) << "unknown predicate type";
 }

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9b8b0202/src/kudu/common/column_predicate.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.h b/src/kudu/common/column_predicate.h
index c90d841..e1461e1 100644
--- a/src/kudu/common/column_predicate.h
+++ b/src/kudu/common/column_predicate.h
@@ -41,6 +41,9 @@ enum class PredicateType {
   // A predicate which evaluates to true if the column value falls within a
   // range.
   Range,
+
+  // A predicate which evaluates to true if the value is not null.
+  IsNotNull,
 };
 
 // A predicate which can be evaluated over a block of column values.
@@ -90,6 +93,9 @@ class ColumnPredicate {
                                                          const void* upper,
                                                          Arena* arena);
 
+  // Creates a new IS NOT NULL predicate for the column.
+  static ColumnPredicate IsNotNull(ColumnSchema column);
+
   // Returns the type of this predicate.
   PredicateType predicate_type() const {
     return predicate_type_;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9b8b0202/src/kudu/common/common.proto
----------------------------------------------------------------------
diff --git a/src/kudu/common/common.proto b/src/kudu/common/common.proto
index a04f9b4..a3505bb 100644
--- a/src/kudu/common/common.proto
+++ b/src/kudu/common/common.proto
@@ -299,8 +299,11 @@ message ColumnPredicatePB {
     optional bytes value = 1;
   }
 
+  message IsNotNull {}
+
   oneof predicate {
     Range range = 2;
     Equality equality = 3;
+    IsNotNull is_not_null = 4;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9b8b0202/src/kudu/tserver/tablet_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index 044b59f..1082e38 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -1310,6 +1310,10 @@ static Status SetupScanSpec(const NewScanRequestPB& scan_pb,
         ret->AddPredicate(ColumnPredicate::Equality(col, value));
         break;
       };
+      case ColumnPredicatePB::kIsNotNull: {
+        ret->AddPredicate(ColumnPredicate::IsNotNull(col));
+        break;
+      };
       default: return Status::InvalidArgument("Unknown predicate type for column", col.name());
     }
   }


Mime
View raw message