kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject [6/8] kudu git commit: KUDU-1651: Check empty dict before decoder eval
Date Mon, 03 Oct 2016 21:50:40 GMT
KUDU-1651: Check empty dict before decoder eval

A completely null dictionary-encoded column will yield an empty
dictionary. When decoder-level evaluation is on, this will result in
an error when creating the bitmap of matching codewords, as bitmaps
must have a non-zero size.

In this scenario, it is sufficient to not determine the set of
matching codewords. The decoder should be completely skipped over
given a null column, so the matching codeword set would not be used
here. If the decoder is not skipped over and an attempt is made to
read from the dictionary block, an error should be thrown.

Change-Id: I76d457c65238b3b2283d826076fdc7525d032e3f
Reviewed-on: http://gerrit.cloudera.org:8080/4537
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <dan@cloudera.com>
(cherry picked from commit 1f40913de6d3427847f3d435e1b84fb928a6f6a9)
Reviewed-on: http://gerrit.cloudera.org:8080/4606
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/298aff77
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/298aff77
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/298aff77

Branch: refs/heads/branch-1.0.x
Commit: 298aff7709b23a7fe7ab793b0dceaf7f397bd64e
Parents: 2f12037
Author: Andrew Wong <anjuwong@g.ucla.edu>
Authored: Sun Sep 25 21:10:17 2016 -0700
Committer: Dan Burkert <dan@cloudera.com>
Committed: Mon Oct 3 21:45:20 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/binary_dict_block.cc         |  1 +
 src/kudu/cfile/cfile_reader.cc              | 15 ++++++++-------
 src/kudu/tablet/tablet-decoder-eval-test.cc | 13 ++++++++++---
 3 files changed, 19 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/298aff77/src/kudu/cfile/binary_dict_block.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/binary_dict_block.cc b/src/kudu/cfile/binary_dict_block.cc
index 478b1cc..30d571a 100644
--- a/src/kudu/cfile/binary_dict_block.cc
+++ b/src/kudu/cfile/binary_dict_block.cc
@@ -264,6 +264,7 @@ Status BinaryDictBlockDecoder::CopyNextAndEval(size_t* n,
 
   // Predicates that have no matching words should return no data.
   SelectionVector* codewords_matching_pred = parent_cfile_iter_->GetCodeWordsMatchingPredicate();
+  CHECK(codewords_matching_pred != nullptr);
   if (!codewords_matching_pred->AnySelected()) {
     // If nothing is selected, move the data_decoder_ pointer forward and clear
     // the corresponding bits in the selection vector.

http://git-wip-us.apache.org/repos/asf/kudu/blob/298aff77/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index 67999f5..8b2d42c 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -906,12 +906,14 @@ Status CFileIterator::Scan(ColumnMaterializationContext* ctx) {
   // yet been determined for this CFile.
   if (dict_decoder_ && ctx->DecoderEvalNotDisabled() && !codewords_matching_pred_)
{
     size_t nwords = dict_decoder_->Count();
-    codewords_matching_pred_.reset(new SelectionVector(nwords));
-    codewords_matching_pred_->SetAllFalse();
-    for (size_t i = 0; i < nwords; i++) {
-      Slice cur_string = dict_decoder_->string_at_index(i);
-      if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void *>(&cur_string)))
{
-        BitmapSet(codewords_matching_pred_->mutable_bitmap(), i);
+    if (nwords > 0) {
+      codewords_matching_pred_.reset(new SelectionVector(nwords));
+      codewords_matching_pred_->SetAllFalse();
+      for (size_t i = 0; i < nwords; i++) {
+        Slice cur_string = dict_decoder_->string_at_index(i);
+        if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void *>(&cur_string)))
{
+          BitmapSet(codewords_matching_pred_->mutable_bitmap(), i);
+        }
       }
     }
   }
@@ -940,7 +942,6 @@ Status CFileIterator::Scan(ColumnMaterializationContext* ctx) {
         }
         size_t this_batch = nblock;
         if (not_null) {
-          // TODO: Maybe copy all and shift later?
           if (ctx->DecoderEvalNotDisabled()) {
             RETURN_NOT_OK(pb->dblk_->CopyNextAndEval(&this_batch,
                                                      ctx,

http://git-wip-us.apache.org/repos/asf/kudu/blob/298aff77/src/kudu/tablet/tablet-decoder-eval-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-decoder-eval-test.cc b/src/kudu/tablet/tablet-decoder-eval-test.cc
index e35f416..6aba34b 100644
--- a/src/kudu/tablet/tablet-decoder-eval-test.cc
+++ b/src/kudu/tablet/tablet-decoder-eval-test.cc
@@ -31,9 +31,9 @@ using strings::Substitute;
 
 enum Setup {
 #ifdef ADDRESS_SANITIZER
-  SMALL = 100, MEDIUM = 3000, LARGE = 10000
+  EMPTY = 0, SMALL = 100, MEDIUM = 3000, LARGE = 10000
 #else
-  SMALL = 100, MEDIUM = 5000, LARGE = 100000
+  EMPTY = 0, SMALL = 100, MEDIUM = 5000, LARGE = 100000
 #endif
 };
 
@@ -287,6 +287,12 @@ TEST_P(TabletDecoderEvalTest, NullableHighCardinality) {
   TestNullableScanAndFilter(50000, 30, 200, 75);
 }
 
+TEST_P(TabletDecoderEvalTest, CompletelyNullColumn) {
+  // Fill a tablet with pattern [0, 50) but with all values being NULL.
+  // Query for values [30, 50).
+  TestNullableScanAndFilter(50, 30, 50, 50);
+}
+
 TEST_P(TabletDecoderEvalTest, MultipleColumns) {
   // Fill a tablet with pattern [0, 10) and query a:[0, 5) AND b:[3, 10).
   // To be considered correct, returned columns must align as they do in the
@@ -294,7 +300,8 @@ TEST_P(TabletDecoderEvalTest, MultipleColumns) {
   TestMultipleColumnPredicates(10, 3, 5);
 }
 
-INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, ::testing::Values(SMALL,
+INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, ::testing::Values(EMPTY,
+                                                                                    SMALL,
                                                                                     MEDIUM,
                                                                                     LARGE));
 


Mime
View raw message