kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [1/2] incubator-kudu git commit: Add ColumnPredicate class
Date Tue, 08 Mar 2016 02:39:03 GMT
Repository: incubator-kudu
Updated Branches:
  refs/heads/master 0c9262ef5 -> 1cea8b7e9


Add ColumnPredicate class

This predicate type is meant to replace the current ScanPredicate class.
It's designed to encapsulate any type of predicate clause that can be applied to
a column. Right now it has Range, and Equality variants, but in the future could
be expanded to included a Compound variant to handle IN or conjunctive clauses.

ColumnPredicate will be integrated into the client and tablet in a followup
commit.

Change-Id: I72cac35d7a168f96c2ee52afd284489e99755e3f
Reviewed-on: http://gerrit.cloudera.org:8080/2137
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/eb71ec75
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/eb71ec75
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/eb71ec75

Branch: refs/heads/master
Commit: eb71ec75d139be2c8bc47a135d8cd034d873e1ca
Parents: 0c9262e
Author: Dan Burkert <dan@cloudera.com>
Authored: Thu Feb 4 21:15:14 2016 -0800
Committer: Dan Burkert <dan@cloudera.com>
Committed: Mon Mar 7 19:15:14 2016 +0000

----------------------------------------------------------------------
 src/kudu/common/CMakeLists.txt           |   2 +
 src/kudu/common/column_predicate-test.cc | 278 ++++++++++++++++++++++
 src/kudu/common/column_predicate.cc      | 322 ++++++++++++++++++++++++++
 src/kudu/common/column_predicate.h       | 194 ++++++++++++++++
 src/kudu/common/row_key-util.cc          |   4 +-
 src/kudu/common/row_key-util.h           |   4 +
 6 files changed, 802 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/eb71ec75/src/kudu/common/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/common/CMakeLists.txt b/src/kudu/common/CMakeLists.txt
index 7cf9495..c5deff6 100644
--- a/src/kudu/common/CMakeLists.txt
+++ b/src/kudu/common/CMakeLists.txt
@@ -40,6 +40,7 @@ ADD_EXPORTABLE_LIBRARY(wire_protocol_proto
   NONLINK_DEPS ${WIRE_PROTOCOL_PROTO_TGTS})
 
 set(COMMON_SRCS
+  column_predicate.cc
   encoded_key.cc
   generic_iterators.cc
   id_mapping.cc
@@ -77,6 +78,7 @@ ADD_EXPORTABLE_LIBRARY(kudu_common
   DEPS ${COMMON_LIBS})
 
 set(KUDU_TEST_LINK_LIBS kudu_common ${KUDU_MIN_TEST_LIBS})
+ADD_KUDU_TEST(column_predicate-test)
 ADD_KUDU_TEST(encoded_key-test)
 ADD_KUDU_TEST(generic_iterators-test)
 ADD_KUDU_TEST(id_mapping-test)

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/eb71ec75/src/kudu/common/column_predicate-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc
new file mode 100644
index 0000000..274a774
--- /dev/null
+++ b/src/kudu/common/column_predicate-test.cc
@@ -0,0 +1,278 @@
+// Licensed to the Apache Software Foundation (ASF) under values[1]
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/common/column_predicate.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+#include <vector>
+
+#include "kudu/common/schema.h"
+#include "kudu/common/types.h"
+#include "kudu/util/test_util.h"
+
+namespace kudu {
+
+class TestColumnPredicate : public KuduTest {
+ public:
+
+  // Test that when a is merged into b and vice versa, the result is equal to
+  // expected, and the resulting type is equal to type.
+  void TestMerge(const ColumnPredicate& a,
+                const ColumnPredicate& b,
+                const ColumnPredicate& expected,
+                PredicateType type) {
+    ColumnPredicate a_base(a);
+    ColumnPredicate b_base(b);
+
+    a_base.Merge(b);
+    b_base.Merge(a);
+
+    ASSERT_EQ(expected, a_base) << "expected: " << expected.ToString()
+                                << ", actual: " << a_base.ToString();
+    ASSERT_EQ(expected, b_base) << "expected: " << expected.ToString()
+                                << ", actual: " << b_base.ToString();
+    ASSERT_EQ(a_base, b_base)  << "expected: " << a_base.ToString()
+                              << ", actual: " << b_base.ToString();
+
+    ASSERT_EQ(expected.predicate_type(), type);
+    ASSERT_EQ(a_base.predicate_type(), type);
+    ASSERT_EQ(b_base.predicate_type(), type);
+  }
+
+  template <typename T>
+  void TestMergeCombinations(const ColumnSchema& column, vector<T> values) {
+    // Range + Range
+
+    // [--------) AND
+    // [--------)
+    // =
+    // [--------)
+    TestMerge(ColumnPredicate::Range(column, &values[0], &values[4]),
+              ColumnPredicate::Range(column, &values[0], &values[4]),
+              ColumnPredicate::Range(column, &values[0], &values[4]),
+              PredicateType::Range);
+
+    // [--------) AND
+    // [----)
+    // =
+    // [----)
+    TestMerge(ColumnPredicate::Range(column, &values[0], &values[4]),
+              ColumnPredicate::Range(column, &values[0], &values[2]),
+              ColumnPredicate::Range(column, &values[0], &values[2]),
+              PredicateType::Range);
+
+    // [--------) AND
+    //   [----)
+    // =
+    //   [----)
+    TestMerge(ColumnPredicate::Range(column, &values[0], &values[4]),
+              ColumnPredicate::Range(column, &values[1], &values[3]),
+              ColumnPredicate::Range(column, &values[1], &values[3]),
+              PredicateType::Range);
+
+    // [-----) AND
+    //   [------)
+    // =
+    //   [---)
+    TestMerge(ColumnPredicate::Range(column, &values[0], &values[3]),
+              ColumnPredicate::Range(column, &values[1], &values[4]),
+              ColumnPredicate::Range(column, &values[1], &values[3]),
+              PredicateType::Range);
+
+    // [--) AND
+    //    [---)
+    // =
+    // None
+    TestMerge(ColumnPredicate::Range(column, &values[0], &values[2]),
+              ColumnPredicate::Range(column, &values[2], &values[5]),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+    // [--) AND
+    //       [---)
+    // =
+    // None
+    TestMerge(ColumnPredicate::Range(column, &values[0], &values[2]),
+              ColumnPredicate::Range(column, &values[4], &values[6]),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+    // Range + Equality
+
+    //   [---) AND
+    // |
+    // =
+    // None
+    TestMerge(ColumnPredicate::Range(column, &values[3], &values[5]),
+              ColumnPredicate::Equality(column, &values[1]),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+    // [---) AND
+    // |
+    // =
+    // |
+    TestMerge(ColumnPredicate::Range(column, &values[1], &values[5]),
+              ColumnPredicate::Equality(column, &values[1]),
+              ColumnPredicate::Equality(column, &values[1]),
+              PredicateType::Equality);
+
+    // [---) AND
+    //   |
+    // =
+    //   |
+    TestMerge(ColumnPredicate::Range(column, &values[1], &values[5]),
+              ColumnPredicate::Equality(column, &values[3]),
+              ColumnPredicate::Equality(column, &values[3]),
+              PredicateType::Equality);
+
+    // [---) AND
+    //     |
+    // =
+    // None
+    TestMerge(ColumnPredicate::Range(column, &values[1], &values[5]),
+              ColumnPredicate::Equality(column, &values[5]),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+
+    // [---) AND
+    //       |
+    // =
+    // None
+    TestMerge(ColumnPredicate::Range(column, &values[1], &values[4]),
+              ColumnPredicate::Equality(column, &values[5]),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+    // None
+
+    // None AND
+    // [----)
+    // =
+    // None
+    TestMerge(ColumnPredicate::None(column),
+              ColumnPredicate::Range(column, &values[1], &values[5]),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+    // None AND
+    //  |
+    // =
+    // None
+    TestMerge(ColumnPredicate::None(column),
+              ColumnPredicate::Equality(column, &values[1]),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+
+    // None AND
+    // None
+    // =
+    // None
+    TestMerge(ColumnPredicate::None(column),
+              ColumnPredicate::None(column),
+              ColumnPredicate::None(column),
+              PredicateType::None);
+  }
+};
+
+TEST_F(TestColumnPredicate, TestMerge) {
+  TestMergeCombinations(ColumnSchema("c", INT8),
+                        vector<int8_t> { 0, 1, 2, 3, 4, 5, 6 });
+
+  TestMergeCombinations(ColumnSchema("c", INT32),
+                        vector<int32_t> { -100, -10, -1, 0, 1, 10, 100 });
+
+  TestMergeCombinations(ColumnSchema("c", STRING),
+                        vector<Slice> { "a", "b", "c", "d", "e", "f", "g" });
+
+  TestMergeCombinations(ColumnSchema("c", BINARY),
+                        vector<Slice> { Slice("", 0),
+                                        Slice("\0", 1),
+                                        Slice("\0\0", 2),
+                                        Slice("\0\0\0", 3),
+                                        Slice("\0\0\0\0", 4),
+                                        Slice("\0\0\0\0\0", 5),
+                                        Slice("\0\0\0\0\0\0", 6),
+                                      });
+}
+
+// Test that the range constructor handles equality and empty ranges.
+TEST_F(TestColumnPredicate, TestRangeConstructor) {
+  {
+    ColumnSchema column("c", INT32);
+    int32_t zero = 0;
+    int32_t one = 1;
+    int32_t two = 2;
+
+    ASSERT_EQ(PredicateType::Range,
+              ColumnPredicate::Range(column, &zero, &two).predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(column, &zero, &one).predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(column, &zero, &zero).predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(column, &one, &zero).predicate_type());
+  }
+  {
+    ColumnSchema column("c", STRING);
+    Slice zero("", 0);
+    Slice one("\0", 1);
+    Slice two("\0\0", 2);
+
+    ASSERT_EQ(PredicateType::Range,
+              ColumnPredicate::Range(column, &zero, &two).predicate_type());
+    ASSERT_EQ(PredicateType::Equality,
+              ColumnPredicate::Range(column, &zero, &one).predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(column, &zero, &zero).predicate_type());
+    ASSERT_EQ(PredicateType::None,
+              ColumnPredicate::Range(column, &one, &zero).predicate_type());
+  }
+}
+
+// Test that the inclusive range constructor handles transforming to exclusive
+// upper bound correctly.
+TEST_F(TestColumnPredicate, TestInclusiveRange) {
+  Arena arena(1024, 1024 * 1024);
+  {
+    ColumnSchema column("c", INT32);
+    int32_t zero = 0;
+    int32_t two = 2;
+    int32_t three = 3;
+    int32_t max = INT32_MAX;
+
+    ASSERT_EQ(ColumnPredicate::Range(column, &zero, &three),
+              ColumnPredicate::InclusiveRange(column, &zero, &two, &arena));
+    ASSERT_EQ(ColumnPredicate::Range(column, &zero, nullptr),
+              ColumnPredicate::InclusiveRange(column, &zero, &max, &arena));
+
+    ASSERT_EQ(boost::none, ColumnPredicate::InclusiveRange(column, nullptr, &max, &arena));
+  }
+  {
+    ColumnSchema column("c", STRING);
+    Slice zero("", 0);
+    Slice two("\0\0", 2);
+    Slice three("\0\0\0", 3);
+
+    ASSERT_EQ(ColumnPredicate::Range(column, &zero, &three),
+              ColumnPredicate::InclusiveRange(column, &zero, &two, &arena));
+  }
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/eb71ec75/src/kudu/common/column_predicate.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
new file mode 100644
index 0000000..22c74ed
--- /dev/null
+++ b/src/kudu/common/column_predicate.cc
@@ -0,0 +1,322 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/common/column_predicate.h"
+
+#include <utility>
+
+#include "kudu/common/row_key-util.h"
+#include "kudu/common/rowblock.h"
+#include "kudu/common/schema.h"
+#include "kudu/common/types.h"
+#include "kudu/util/memory/arena.h"
+
+using std::move;
+
+namespace kudu {
+
+ColumnPredicate::ColumnPredicate(PredicateType predicate_type,
+                                 ColumnSchema column,
+                                 const void* lower,
+                                 const void* upper)
+    : predicate_type_(predicate_type),
+      column_(move(column)),
+      lower_(lower),
+      upper_(upper) {
+}
+
+ColumnPredicate ColumnPredicate::Equality(ColumnSchema column, const void* value) {
+  CHECK(value != nullptr);
+  return ColumnPredicate(PredicateType::Equality, move(column), value, nullptr);
+}
+
+ColumnPredicate ColumnPredicate::Range(ColumnSchema column,
+                                       const void* lower,
+                                       const void* upper) {
+  CHECK(lower != nullptr || upper != nullptr);
+  ColumnPredicate pred(PredicateType::Range, move(column), lower, upper);
+  pred.Simplify();
+  return pred;
+}
+
+boost::optional<ColumnPredicate> ColumnPredicate::InclusiveRange(ColumnSchema column,
+                                                                 const void* lower,
+                                                                 const void* upper,
+                                                                 Arena* arena) {
+  CHECK(lower != nullptr || upper != nullptr);
+
+  if (upper != nullptr) {
+    // Transform the upper bound to exclusive by incrementing it.
+    // Make a copy of the value before incrementing in case it's aliased.
+    size_t size = column.type_info()->size();
+    void*  buf = CHECK_NOTNULL(arena->AllocateBytes(size));
+    memcpy(buf, upper, size);
+    if (!row_key_util::IncrementCell(column, buf, arena)) {
+      if (lower == nullptr) {
+        return boost::none;
+      } else {
+        upper = nullptr;
+      }
+    } else {
+      upper = buf;
+    }
+  }
+  return ColumnPredicate::Range(move(column), lower, upper);
+}
+
+ColumnPredicate ColumnPredicate::None(ColumnSchema column) {
+  return ColumnPredicate(PredicateType::None, move(column), nullptr, nullptr);
+}
+
+void ColumnPredicate::SetToNone() {
+  predicate_type_ = PredicateType::None;
+  lower_ = nullptr;
+  upper_ = nullptr;
+}
+
+void ColumnPredicate::Simplify() {
+  switch (predicate_type_) {
+    case PredicateType::None: return;
+    case PredicateType::Equality: return;
+    case PredicateType::Range: {
+      if (lower_ != nullptr && upper_ != nullptr) {
+        if (column_.type_info()->Compare(lower_, upper_) >= 0) {
+          // If the range bounds are empty then no results can be returned.
+          SetToNone();
+        } else if (column_.type_info()->AreConsecutive(lower_, upper_)) {
+          // If the values are consecutive, then it is an equality bound.
+          predicate_type_ = PredicateType::Equality;
+          upper_ = nullptr;
+        }
+      }
+      return;
+    };
+  }
+  LOG(FATAL) << "unknown predicate type";
+}
+
+void ColumnPredicate::Merge(const ColumnPredicate& other) {
+  CHECK(column_.Equals(other.column_, false));
+  switch (predicate_type_) {
+    case PredicateType::None: return;
+    case PredicateType::Range: {
+      MergeIntoRange(other);
+      return;
+    };
+    case PredicateType::Equality: {
+      MergeIntoEquality(other);
+      return;
+    };
+  }
+  LOG(FATAL) << "unknown predicate type";
+}
+
+void ColumnPredicate::MergeIntoRange(const ColumnPredicate& other) {
+  CHECK(predicate_type_ == PredicateType::Range);
+
+  switch (other.predicate_type()) {
+    case PredicateType::None: {
+      SetToNone();
+      return;
+    };
+
+    case PredicateType::Range: {
+      // Set the lower bound to the larger of the two.
+      if (other.lower_ != nullptr &&
+          (lower_ == nullptr || column_.type_info()->Compare(lower_, other.lower_) <
0)) {
+        lower_ = other.lower_;
+      }
+
+      // Set the upper bound to the smaller of the two.
+      if (other.upper_ != nullptr &&
+          (upper_ == nullptr || column_.type_info()->Compare(upper_, other.upper_) >
0)) {
+        upper_ = other.upper_;
+      }
+
+      Simplify();
+      return;
+    };
+
+    case PredicateType::Equality: {
+      if (column_.type_info()->Compare(lower_, other.lower_) > 0 ||
+          column_.type_info()->Compare(upper_, other.lower_) <= 0) {
+        // The equality value does not fall in this range.
+        SetToNone();
+      } else {
+        predicate_type_ = PredicateType::Equality;
+        lower_ = other.lower_;
+        upper_ = nullptr;
+      }
+      return;
+    };
+  }
+  LOG(FATAL) << "unknown predicate type";
+}
+
+void ColumnPredicate::MergeIntoEquality(const ColumnPredicate& other) {
+  CHECK(predicate_type_ == PredicateType::Equality);
+
+  switch (other.predicate_type()) {
+    case PredicateType::None: {
+      SetToNone();
+      return;
+    }
+    case PredicateType::Range: {
+      if (column_.type_info()->Compare(lower_, other.lower_) < 0 ||
+          column_.type_info()->Compare(lower_, other.upper_) >= 0) {
+        // This equality value does not fall in the other range.
+        SetToNone();
+      }
+      return;
+    };
+    case PredicateType::Equality: {
+      if (column_.type_info()->Compare(lower_, other.lower_) != 0) {
+        SetToNone();
+      }
+      return;
+    };
+  }
+  LOG(FATAL) << "unknown predicate type";
+}
+
+namespace {
+template <typename P>
+void ApplyPredicate(const ColumnBlock& block, SelectionVector* sel, P p) {
+  if (block.is_nullable()) {
+    for (size_t i = 0; i < block.nrows(); i++) {
+      if (!sel->IsRowSelected(i)) continue;
+      const void *cell = block.nullable_cell_ptr(i);
+      if (cell == nullptr || !p(cell)) {
+        BitmapClear(sel->mutable_bitmap(), i);
+      }
+    }
+  } else {
+    for (size_t i = 0; i < block.nrows(); i++) {
+      if (!sel->IsRowSelected(i)) continue;
+      const void *cell = block.cell_ptr(i);
+      if (!p(cell)) {
+        BitmapClear(sel->mutable_bitmap(), i);
+      }
+    }
+  }
+}
+} // anonymous namespace
+
+void ColumnPredicate::Evaluate(const ColumnBlock& block, SelectionVector *sel) const
{
+  CHECK_NOTNULL(sel);
+
+  // The type-specific predicate is provided as a function template to
+  // ApplyPredicate in the hope that they are inlined.
+  //
+  // TODO: In the future we can improve this by also providing the type info as a
+  // template, so that the type-specific data comparisons can be inlined.
+  //
+  // Going a step further we could do runtime codegen to inline the
+  // lower/upper/equality bounds.
+
+  // TODO: equality predicates should use the bloomfilter if it's available.
+
+  switch (predicate_type()) {
+    case PredicateType::None: {
+      ApplyPredicate(block, sel, [] (const void*) {
+          return false;
+      });
+      return;
+    };
+    case PredicateType::Range: {
+      if (lower_ == nullptr) {
+        ApplyPredicate(block, sel, [this] (const void* cell) {
+            return column_.type_info()->Compare(cell, this->upper_) < 0;
+        });
+      } else if (upper_ == nullptr) {
+        ApplyPredicate(block, sel, [this] (const void* cell) {
+            return column_.type_info()->Compare(cell, this->lower_) >= 0;
+        });
+      } else {
+        ApplyPredicate(block, sel, [this] (const void* cell) {
+            return column_.type_info()->Compare(cell, this->upper_) < 0 &&
+                   column_.type_info()->Compare(cell, this->lower_) >= 0;
+        });
+      }
+      return;
+    };
+    case PredicateType::Equality: {
+        ApplyPredicate(block, sel, [this] (const void* cell) {
+            return column_.type_info()->Compare(cell, this->lower_) == 0;
+        });
+        return;
+    };
+  }
+  LOG(FATAL) << "unknown predicate type";
+}
+
+string ColumnPredicate::ToString() const {
+  switch (predicate_type()) {
+    case PredicateType::None: return strings::Substitute("`$0` NONE", column_.name());
+    case PredicateType::Range: {
+      if (lower_ == nullptr) {
+        return strings::Substitute("`$0` < $1", column_.name(), column_.Stringify(upper_));
+      } else if (upper_ == nullptr) {
+        return strings::Substitute("`$0` >= $1", column_.name(), column_.Stringify(lower_));
+      } else {
+        return strings::Substitute("`$0` >= $1 AND `$0` < $2",
+                                   column_.name(),
+                                   column_.Stringify(lower_),
+                                   column_.Stringify(upper_));
+      }
+    };
+    case PredicateType::Equality: {
+      return strings::Substitute("`$0` = $1", column_.name(), column_.Stringify(lower_));
+    };
+  }
+  LOG(FATAL) << "unknown predicate type";
+}
+
+bool ColumnPredicate::operator==(const ColumnPredicate& other) const {
+  if (!column_.Equals(other.column_, false)) { return false; }
+  if (predicate_type_ != other.predicate_type_) {
+    return false;
+  } else if (predicate_type_ == PredicateType::Equality) {
+    return column_.type_info()->Compare(lower_, other.lower_) == 0;
+  } else if (predicate_type_ == PredicateType::Range) {
+    return (lower_ == other.lower_ ||
+            (lower_ != nullptr && other.lower_ != nullptr &&
+             column_.type_info()->Compare(lower_, other.lower_) == 0)) &&
+           (upper_ == other.upper_ ||
+            (upper_ != nullptr && other.upper_ != nullptr &&
+             column_.type_info()->Compare(upper_, other.upper_) == 0));
+  } else {
+    return true;
+  }
+}
+
+namespace {
+int SelectivityRank(const ColumnPredicate& predicate) {
+  switch (predicate.predicate_type()) {
+    case PredicateType::None: return 0;
+    case PredicateType::Equality: return 1;
+    case PredicateType::Range: return 2;
+  }
+  LOG(FATAL) << "unknown predicate type";
+}
+} // anonymous namespace
+
+int SelectivityComparator(const ColumnPredicate& left, const ColumnPredicate& right)
{
+  return SelectivityRank(left) - SelectivityRank(right);
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/eb71ec75/src/kudu/common/column_predicate.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.h b/src/kudu/common/column_predicate.h
new file mode 100644
index 0000000..3bbc81a
--- /dev/null
+++ b/src/kudu/common/column_predicate.h
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <boost/optional.hpp>
+#include <string>
+
+#include "kudu/common/row_key-util.h"
+#include "kudu/common/schema.h"
+
+namespace kudu {
+
+class Arena;
+class ColumnBlock;
+class ColumnSchema;
+class SelectionVector;
+class TypeInfo;
+
+enum class PredicateType {
+  // A predicate which always evaluates to false.
+  None,
+
+  // A predicate which evaluates to true if the column value equals a known
+  // value.
+  Equality,
+
+  // A predicate which evaluates to true if the column value falls within a
+  // range.
+  Range,
+};
+
+// A predicate which can be evaluated over a block of column values.
+//
+// Predicates over the same column can be merged to create a conjunction of the
+// two constituent predicates.
+//
+// There are multiple types of column predicates, which have different behavior
+// when merging and evaluating.
+//
+// A ColumnPredicate does not own the data to which it points internally,
+// so its lifetime must be managed to make sure it does not reference invalid
+// data. Typically the lifetime of a ColumnPredicate will be tied to a scan (on
+// the client side), or a scan iterator (on the server side).
+class ColumnPredicate {
+ public:
+
+  // Creates a new equality predicate on the column and value.
+  //
+  // The value is not copied, and must outlive the returned predicate.
+  static ColumnPredicate Equality(ColumnSchema column, const void* value);
+
+  // Creates a new range column predicate from an inclusive lower bound and
+  // exclusive upper bound.
+  //
+  // The values are not copied, and must outlive the returned predicate.
+  //
+  // Either (but not both) of the bounds may be a nullptr to indicate an
+  // unbounded range on that end.
+  //
+  // The range will be simplified into an Equality or None predicate type if
+  // possible.
+  static ColumnPredicate Range(ColumnSchema column, const void* lower, const void* upper);
+
+  // Creates a new range column predicate from an inclusive lower bound and an
+  // inclusive upper bound.
+  //
+  // The values are not copied, and must outlive the returned predicate. The
+  // arena is used for allocating an incremented upper bound to transform the
+  // bound to a exclusive. The arena must outlive the returned predicate.
+  //
+  // If a normalized column predicate cannot be created, then boost::none will
+  // be returned. This indicates that the predicate would cover the entire
+  // column range.
+  static boost::optional<ColumnPredicate> InclusiveRange(ColumnSchema column,
+                                                         const void* lower,
+                                                         const void* upper,
+                                                         Arena* arena);
+
+  // Returns the type of this predicate.
+  PredicateType predicate_type() const {
+    return predicate_type_;
+  }
+
+  // Merge another predicate into this one.
+  //
+  // The other predicate must be on the same column.
+  //
+  // After a merge, this predicate will be the logical intersection of the
+  // original predicates.
+  //
+  // Data is not copied from the other predicate, so its data must continue to
+  // outlive the merged predicate.
+  void Merge(const ColumnPredicate& other);
+
+  // Evaluate the predicate on every row in the column block.
+  //
+  // This is evaluated as an 'AND' with the current contents of *sel:
+  // - If the predicate evaluates to false, sets the appropriate bit in the
+  //   selection vector to 0.
+  // - If the predicate evaluates to true, does not make any change to the
+  //   selection vector.
+  //
+  // On any rows where the current value of *sel is false, the predicate evaluation
+  // may be skipped.
+  //
+  // NOTE: the evaluation result is stored into '*sel' which may or may not be the
+  // same vector as block->selection_vector().
+  void Evaluate(const ColumnBlock& block, SelectionVector* sel) const;
+
+  // Print the predicate for debugging.
+  std::string ToString() const;
+
+  // Returns true if the column predicates are equivalent.
+  //
+  // Predicates over different columns are not equal.
+  bool operator==(const ColumnPredicate& other) const;
+
+  // Returns the raw lower bound value if this is a range predicate, or the
+  // equality value if this is an equality predicate.
+  const void* raw_lower() const {
+    return lower_;
+  }
+
+  // Returns the raw upper bound if this is a range predicate.
+  const void* raw_upper() const {
+    return upper_;
+  }
+
+  // Returns the column schema of the column on which this predicate applies.
+  const ColumnSchema& column() const {
+    return column_;
+  }
+
+ private:
+
+  friend class TestColumnPredicate;
+
+  // Creates a new column predicate.
+  ColumnPredicate(PredicateType predicate_type,
+                  ColumnSchema column,
+                  const void* lower,
+                  const void* upper);
+
+  // Creates a new predicate which matches no values.
+  static ColumnPredicate None(ColumnSchema column);
+
+  // Transition to a None predicate type.
+  void SetToNone();
+
+  // Simplifies this predicate if possible.
+  void Simplify();
+
+  // Merge another predicate into this Range predicate.
+  void MergeIntoRange(const ColumnPredicate& other);
+
+  // Merge another predicate into this Equality predicate.
+  void MergeIntoEquality(const ColumnPredicate& other);
+
+  // The type of this predicate.
+  PredicateType predicate_type_;
+
+  // The data type of the column. TypeInfo instances have a static lifetime.
+  ColumnSchema column_;
+
+  // The inclusive lower bound value if this is a Range predicate, or the
+  // equality value if this is an Equality predicate.
+  const void* lower_;
+
+  // The exclusive upper bound value if this is a Range predicate.
+  const void* upper_;
+};
+
+// Compares predicates according to selectivity. Predicates that match fewer
+// rows will sort before predicates that match more rows.
+//
+// TODO: this could be improved with a histogram of expected values.
+int SelectivityComparator(const ColumnPredicate& left, const ColumnPredicate& right);
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/eb71ec75/src/kudu/common/row_key-util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/row_key-util.cc b/src/kudu/common/row_key-util.cc
index 2629952..dadf74e 100644
--- a/src/kudu/common/row_key-util.cc
+++ b/src/kudu/common/row_key-util.cc
@@ -63,6 +63,8 @@ bool IncrementStringCell(void* cell_ptr, Arena* arena) {
   return true;
 }
 
+} // anonymous namespace
+
 bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena* arena) {
   DataType type = col.type_info()->physical_type();
   switch (type) {
@@ -90,8 +92,6 @@ bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena* arena)
{
 #undef HANDLE_TYPE
 }
 
-} // anonymous namespace
-
 void SetKeyToMinValues(ContiguousRow* row) {
   for (int i = 0; i < row->schema()->num_key_columns(); i++) {
     const ColumnSchema& col = row->schema()->column(i);

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/eb71ec75/src/kudu/common/row_key-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/row_key-util.h b/src/kudu/common/row_key-util.h
index 558ca42..21df4ab 100644
--- a/src/kudu/common/row_key-util.h
+++ b/src/kudu/common/row_key-util.h
@@ -24,6 +24,7 @@
 namespace kudu {
 
 class Arena;
+class ColumnSchema;
 class ContiguousRow;
 
 namespace row_key_util {
@@ -68,6 +69,9 @@ bool IncrementKey(ContiguousRow* row, Arena* arena) WARN_UNUSED_RESULT;
 bool IncrementKeyPrefix(ContiguousRow* row, int prefix_len,
                         Arena* arena) WARN_UNUSED_RESULT;
 
+// Increments the provided cell in place.
+bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena* arena);
+
 } // namespace row_key_util
 } // namespace kudu
 #endif /* KUDU_COMMON_ROW_KEY_UTIL_H */


Mime
View raw message