kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject kudu git commit: Tightening ScanSpec primary bounds when range predicate exists
Date Fri, 06 Jan 2017 22:25:54 GMT
Repository: kudu
Updated Branches:
  refs/heads/master e126bde8d -> 443b724e8


Tightening ScanSpec primary bounds when range predicate exists

Currently, push upper bound key predicates will break when meeting
a range predicate.
I think the upper bound key can be decreased.
For example, If PK = (a, b) and a < 2 && b < 3, PK can be further
tightning to PK < (1, 3).

Change-Id: I931a617605434700352d285fc2039033cf9eb07e
Reviewed-on: http://gerrit.cloudera.org:8080/5360
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <danburkert@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/443b724e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/443b724e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/443b724e

Branch: refs/heads/master
Commit: 443b724e8858bc88747cbf33ec34afd5dbd3be2a
Parents: e126bde
Author: honghaijei <honghaijie@gmail.com>
Authored: Mon Dec 5 12:50:21 2016 +0000
Committer: Dan Burkert <danburkert@apache.org>
Committed: Fri Jan 6 22:25:39 2017 +0000

----------------------------------------------------------------------
 src/kudu/common/key_util-test.cc  |  57 +++++++++++++++
 src/kudu/common/key_util.cc       | 122 ++++++++++++++++++++++++++++-----
 src/kudu/common/key_util.h        |   3 +
 src/kudu/common/scan_spec-test.cc |  61 +++++++++++++++--
 4 files changed, 221 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/key_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_util-test.cc b/src/kudu/common/key_util-test.cc
index 70dc181..b6c71f3 100644
--- a/src/kudu/common/key_util-test.cc
+++ b/src/kudu/common/key_util-test.cc
@@ -130,4 +130,61 @@ TEST_F(KeyUtilTest, TestIncrementCompositeStringIntPrimaryKey) {
   EXPECT_EQ(R"(string k1="hello\000", int32 k2=-2147483648)", p_row.ToString());
 }
 
+TEST_F(KeyUtilTest, TestTryDecrementCell) {
+  {
+    ColumnSchema col_uint32("a", UINT32);
+    uint32_t orig = 12;
+    EXPECT_EQ(key_util::TryDecrementCell(col_uint32, &orig), true);
+    EXPECT_EQ(orig, 11);
+  }
+  {
+    ColumnSchema col_uint32("a", UINT32);
+    uint32_t orig = 0;
+    EXPECT_EQ(key_util::TryDecrementCell(col_uint32, &orig), false);
+    EXPECT_EQ(orig, 0);
+  }
+  {
+    ColumnSchema col_int32("a", INT32);
+    int32_t orig = 0;
+    EXPECT_EQ(key_util::TryDecrementCell(col_int32, &orig), true);
+    EXPECT_EQ(orig, -1);
+  }
+  {
+    ColumnSchema col_int32("a", INT32);
+    int32_t orig = numeric_limits<int32_t>::min();
+    EXPECT_EQ(key_util::TryDecrementCell(col_int32, &orig), false);
+    EXPECT_EQ(orig, numeric_limits<int32_t>::min());
+  }
+  {
+    ColumnSchema col_bool("a", BOOL);
+    bool orig = true;
+    EXPECT_EQ(key_util::TryDecrementCell(col_bool, &orig), true);
+    EXPECT_EQ(orig, false);
+  }
+  {
+    ColumnSchema col_bool("a", BOOL);
+    int32_t orig = false;
+    EXPECT_EQ(key_util::TryDecrementCell(col_bool, &orig), false);
+    EXPECT_EQ(orig, false);
+  }
+  {
+    ColumnSchema col_str("a", STRING);
+    Slice orig("abc\0", 4);
+    EXPECT_EQ(key_util::TryDecrementCell(col_str, &orig), true);
+    EXPECT_EQ(orig, Slice("abc"));
+  }
+  {
+    ColumnSchema col_str("a", STRING);
+    Slice orig("abc");
+    EXPECT_EQ(key_util::TryDecrementCell(col_str, &orig), false);
+    EXPECT_EQ(orig, Slice("abc"));
+  }
+  {
+    ColumnSchema col_str("a", STRING);
+    Slice orig("");
+    EXPECT_EQ(key_util::TryDecrementCell(col_str, &orig), false);
+    EXPECT_EQ(orig, Slice(""));
+  }
+}
+
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/key_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_util.cc b/src/kudu/common/key_util.cc
index 8ee6eb3..451cadd 100644
--- a/src/kudu/common/key_util.cc
+++ b/src/kudu/common/key_util.cc
@@ -55,6 +55,17 @@ bool IncrementBoolCell(void* cell_ptr) {
   }
 }
 
+bool DecrementBoolCell(void* cell_ptr) {
+  bool orig;
+  memcpy(&orig, cell_ptr, sizeof(bool));
+  if (orig) {
+    bool dec = false;
+    memcpy(cell_ptr, &dec, sizeof(bool));
+    return true;
+  }
+  return false;
+}
+
 template<DataType type>
 bool IncrementIntCell(void* cell_ptr) {
   typedef DataTypeTraits<type> traits;
@@ -80,6 +91,33 @@ bool IncrementIntCell(void* cell_ptr) {
 }
 
 template<DataType type>
+bool DecrementIntCell(void* cell_ptr) {
+  typedef DataTypeTraits<type> traits;
+  typedef typename traits::cpp_type cpp_type;
+
+  cpp_type orig;
+  memcpy(&orig, cell_ptr, sizeof(cpp_type));
+
+  cpp_type dec;
+  if (std::is_unsigned<cpp_type>::value) {
+    dec = orig - 1;
+  } else {
+    // Signed overflow is undefined in C. So, we'll use a branch here
+    // instead of counting on undefined behavior.
+    if (orig == MathLimits<cpp_type>::kMin) {
+      dec = MathLimits<cpp_type>::kMax;
+    } else {
+      dec = orig - 1;
+    }
+  }
+  if (dec < orig) {
+    memcpy(cell_ptr, &dec, sizeof(cpp_type));
+    return true;
+  }
+  return false;
+}
+
+template<DataType type>
 bool IncrementFloatingPointCell(void* cell_ptr) {
   typedef DataTypeTraits<type> traits;
   typedef typename traits::cpp_type cpp_type;
@@ -91,6 +129,20 @@ bool IncrementFloatingPointCell(void* cell_ptr) {
   return inc != orig;
 }
 
+template<DataType type>
+bool DecrementFloatingPointCell(void* cell_ptr) {
+  typedef DataTypeTraits<type> traits;
+  typedef typename traits::cpp_type cpp_type;
+
+  cpp_type orig;
+  memcpy(&orig, cell_ptr, sizeof(cpp_type));
+  cpp_type dec = nextafter(orig, -numeric_limits<cpp_type>::infinity());
+  if (dec != orig) {
+    memcpy(cell_ptr, &dec, sizeof(cpp_type));
+  }
+  return false;
+}
+
 bool IncrementStringCell(void* cell_ptr, Arena* arena) {
   Slice orig;
   memcpy(&orig, cell_ptr, sizeof(orig));
@@ -104,6 +156,17 @@ bool IncrementStringCell(void* cell_ptr, Arena* arena) {
   return true;
 }
 
+bool DecrementStringCell(void* cell_ptr) {
+  Slice orig;
+  memcpy(&orig, cell_ptr, sizeof(orig));
+  if (orig.size() == 0 || orig[orig.size() - 1] != '\0') {
+    return false;
+  }
+  orig.truncate(orig.size() - 1);
+  memcpy(cell_ptr, &orig, sizeof(orig));
+  return true;
+}
+
 template<typename ColIdxIter>
 void SetKeyToMinValues(ColIdxIter first, ColIdxIter last, ContiguousRow* row) {
   for (auto col_idx_it = first; col_idx_it != last; std::advance(col_idx_it, 1)) {
@@ -140,13 +203,11 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
 
   const Schema& schema = *CHECK_NOTNULL(row->schema());
   int pushed_predicates = 0;
-  // Tracks whether the last pushed predicate is an equality or InList predicate.
-  const ColumnPredicate* final_predicate = nullptr;
-
-  // Step 1: copy predicates into the row in key column order, stopping after
-  // the first range or missing predicate.
 
+  // Step 1: copy predicates into the row in key column order, stopping after the first missing
+  // predicate or range predicate which can't be transformed to an inclusive upper bound.
   bool break_loop = false;
+  bool is_inclusive_bound = true;
   for (auto col_idx_it = first; !break_loop && col_idx_it < last; std::advance(col_idx_it,
1)) {
     const ColumnSchema& column = schema.column(*col_idx_it);
     const ColumnPredicate* predicate = FindOrNull(predicates, column.name());
@@ -156,19 +217,20 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
       case PredicateType::Equality:
         memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_lower(), size);
         pushed_predicates++;
-        final_predicate = predicate;
         break;
       case PredicateType::Range:
         if (predicate->raw_upper() != nullptr) {
           memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_upper(), size);
           pushed_predicates++;
-          final_predicate = predicate;
+          // Try to decrease because upper bound is exclusive.
+          if (!TryDecrementCell(row->schema()->column(*col_idx_it),
+                                row->mutable_cell_ptr(*col_idx_it))) {
+            is_inclusive_bound = false;
+            break_loop = true;
+          }
+        } else {
+          break_loop = true;
         }
-        // After the first column with a range constraint we stop pushing
-        // constraints into the upper bound. Instead, we push minimum values
-        // to the remaining columns (below), which is the maximally tight
-        // constraint.
-        break_loop = true;
         break;
       case PredicateType::IsNotNull:
         break_loop = true;
@@ -179,7 +241,6 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
         DCHECK(!predicate->raw_values().empty());
         memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_values().back(),
size);
         pushed_predicates++;
-        final_predicate = predicate;
         break;
       case PredicateType::None:
         LOG(FATAL) << "NONE predicate can not be pushed into key";
@@ -189,10 +250,8 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
   // If no predicates were pushed, no need to do any more work.
   if (pushed_predicates == 0) { return 0; }
 
-  // Step 2: If the final predicate is an equality predicate or an InList predicate,
-  // increment the key to convert it to an exclusive upper bound.
-  if (final_predicate->predicate_type() == PredicateType::Equality
-      || final_predicate->predicate_type() == PredicateType::InList) {
+  // Step 2: If the upper bound is inclusive, increment it to become exclusive.
+  if (is_inclusive_bound) {
     if (!IncrementKey(first, std::next(first, pushed_predicates), row, arena)) {
       // If the increment fails then this bound is is not constraining the keyspace.
       return 0;
@@ -295,6 +354,35 @@ bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena*
arena) {
 #undef HANDLE_TYPE
 }
 
+bool TryDecrementCell(const ColumnSchema &col, void *cell_ptr) {
+  DataType type = col.type_info()->physical_type();
+  switch (type) {
+    case BOOL:
+      return DecrementBoolCell(cell_ptr);
+#define HANDLE_TYPE(t) case t: return DecrementIntCell<t>(cell_ptr);
+    HANDLE_TYPE(UINT8);
+    HANDLE_TYPE(UINT16);
+    HANDLE_TYPE(UINT32);
+    HANDLE_TYPE(UINT64);
+    HANDLE_TYPE(INT8);
+    HANDLE_TYPE(INT16);
+    HANDLE_TYPE(INT32);
+    HANDLE_TYPE(UNIXTIME_MICROS);
+    HANDLE_TYPE(INT64);
+    case FLOAT:
+      return DecrementFloatingPointCell<FLOAT>(cell_ptr);
+    case DOUBLE:
+      return DecrementFloatingPointCell<DOUBLE>(cell_ptr);
+    case STRING:
+    case BINARY:
+      return DecrementStringCell(cell_ptr);
+    case UNKNOWN_DATA:
+    default: LOG(FATAL) << "Unknown data type: " << type;
+  }
+  return false; // unreachable
+#undef HANDLE_TYPE
+}
+
 int PushLowerBoundKeyPredicates(const vector<int32_t>& col_idxs,
                                 const unordered_map<string, ColumnPredicate>& predicates,
                                 ContiguousRow* row,

http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/key_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_util.h b/src/kudu/common/key_util.h
index 8dc60f4..70da7c6 100644
--- a/src/kudu/common/key_util.h
+++ b/src/kudu/common/key_util.h
@@ -60,6 +60,9 @@ bool IncrementPrimaryKey(ContiguousRow* row, Arena* arena) WARN_UNUSED_RESULT;
 // Increments the provided cell in place.
 bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena* arena);
 
+// Decrements the provided cell. If fail, the result will not be updated.
+bool TryDecrementCell(const ColumnSchema &col, void *cell_ptr);
+
 // Pushes lower bound key predicates into the row. Returns the number of pushed
 // predicates. Unpushed predicate columns will be set to the minimum value
 // (unless no predicates are pushed at all).

http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/scan_spec-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/scan_spec-test.cc b/src/kudu/common/scan_spec-test.cc
index bf40be7..7798854 100644
--- a/src/kudu/common/scan_spec-test.cc
+++ b/src/kudu/common/scan_spec-test.cc
@@ -46,7 +46,8 @@ class TestScanSpec : public KuduTest {
   enum ComparisonOp {
     GE,
     EQ,
-    LE
+    LE,
+    LT
   };
 
   template<class T>
@@ -68,7 +69,10 @@ class TestScanSpec : public KuduTest {
         auto p = ColumnPredicate::InclusiveRange(schema_.column(idx), nullptr, val_void,
&arena_);
         if (p) spec->AddPredicate(*p);
         break;
-      };
+      }
+      case LT:
+        spec->AddPredicate(ColumnPredicate::Range(schema_.column(idx), nullptr, val_void));
+        break;
     }
   }
 
@@ -193,7 +197,7 @@ TEST_F(CompositeIntKeysTest, TestConsecutiveUpperRangePredicates) {
   AddPredicate<int8_t>(&spec, "c", LE, 5);
   SCOPED_TRACE(spec.ToString(schema_));
   spec.OptimizeScan(schema_, &arena_, &pool_, true);
-  EXPECT_EQ("PK < (int8 a=4, int8 b=-128, int8 c=-128) AND `b` < 5 AND `c` < 6",
+  EXPECT_EQ("PK < (int8 a=3, int8 b=4, int8 c=6) AND `b` < 5 AND `c` < 6",
             spec.ToString(schema_));
 }
 
@@ -221,7 +225,7 @@ TEST_F(CompositeIntKeysTest, TestEqualityAndConsecutiveRangePredicates)
{
   SCOPED_TRACE(spec.ToString(schema_));
   spec.OptimizeScan(schema_, &arena_, &pool_, true);
   EXPECT_EQ("PK >= (int8 a=3, int8 b=4, int8 c=5) AND "
-            "PK < (int8 a=3, int8 b=15, int8 c=-128) AND "
+            "PK < (int8 a=3, int8 b=14, int8 c=16) AND "
             "`c` >= 5 AND `c` < 16", spec.ToString(schema_));
 }
 
@@ -362,7 +366,7 @@ TEST_F(CompositeIntKeysTest, TestInListPushdownWithRange) {
   SCOPED_TRACE(spec.ToString(schema_));
   spec.OptimizeScan(schema_, &arena_, &pool_, true);
   EXPECT_EQ("PK >= (int8 a=10, int8 b=50, int8 c=-128) AND "
-            "PK < (int8 a=101, int8 b=-128, int8 c=-128) AND "
+            "PK < (int8 a=100, int8 b=101, int8 c=-128) AND "
             "`b` IN (50, 100)",
             spec.ToString(schema_));
 
@@ -694,6 +698,53 @@ TEST_F(CompositeIntStringKeysTest, TestPrefixEqualityWithString) {
             spec.ToString(schema_));
 }
 
+TEST_F(CompositeIntStringKeysTest, TestDecreaseUpperBoundKey) {
+  {
+    ScanSpec spec;
+    AddPredicate<int8_t>(&spec, "a", LT, 64);
+    AddPredicate<Slice>(&spec, "b", LT, Slice("abc"));
+    AddPredicate<Slice>(&spec, "c", LT, Slice("def\0", 4));
+    SCOPED_TRACE(spec.ToString(schema_));
+    spec.OptimizeScan(schema_, &arena_, &pool_, true);
+    EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="") AND )"
+              R"(`b` < "abc" AND `c` < "def\000")",
+              spec.ToString(schema_));
+  }
+  {
+    ScanSpec spec;
+    AddPredicate<int8_t>(&spec, "a", LT, 64);
+    AddPredicate<Slice>(&spec, "b", LT, Slice("abc\0", 4));
+    AddPredicate<Slice>(&spec, "c", LT, Slice("def"));
+    SCOPED_TRACE(spec.ToString(schema_));
+    spec.OptimizeScan(schema_, &arena_, &pool_, true);
+    EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="def") AND )"
+              R"(`b` < "abc\000" AND `c` < "def")",
+              spec.ToString(schema_));
+  }
+  {
+    ScanSpec spec;
+    AddPredicate<int8_t>(&spec, "a", LT, 64);
+    AddPredicate<Slice>(&spec, "b", LT, Slice("abc\0", 4));
+    AddPredicate<Slice>(&spec, "c", LT, Slice("def\0", 4));
+    SCOPED_TRACE(spec.ToString(schema_));
+    spec.OptimizeScan(schema_, &arena_, &pool_, true);
+    EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="def\000") AND )"
+              R"(`b` < "abc\000" AND `c` < "def\000")",
+              spec.ToString(schema_));
+  }
+  {
+    ScanSpec spec;
+    AddPredicate<int8_t>(&spec, "a", LT, 64);
+    AddPredicate<Slice>(&spec, "b", LT, Slice("abc"));
+    AddPredicate<Slice>(&spec, "c", LT, Slice("def"));
+    SCOPED_TRACE(spec.ToString(schema_));
+    spec.OptimizeScan(schema_, &arena_, &pool_, true);
+    EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="") AND )"
+              R"(`b` < "abc" AND `c` < "def")",
+              spec.ToString(schema_));
+  }
+}
+
 // Tests for non-composite int key
 //------------------------------------------------------------
 class SingleIntKeyTest : public TestScanSpec {


Mime
View raw message