impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tarmstr...@apache.org
Subject incubator-impala git commit: IMPALA-4049: fix empty batch handling NLJ build side
Date Wed, 31 Aug 2016 21:29:51 GMT
Repository: incubator-impala
Updated Branches:
  refs/heads/master 5adedc6a1 -> 1350c3476


IMPALA-4049: fix empty batch handling NLJ build side

Memory from the build side of a nested loop join is
referenced by its output batches, so accumulated memory
build side resources must be transferred to the caller.
Special-cased handling of empty batches did not transfer
the memory. The fix is to accumulate empty batches and
transfer their resources in the same way as non-empty
batches. The iterator required changes to handle empty
batches in the list.

Testing:
Added a unit test that exercises the bug RowBatchList.
Add a query test that causes a crash in the ASAN build
and incorrect results in the debug build.

Change-Id: I3cb19e536b87bbb4d4ae82d1636ba1463a422789
Reviewed-on: http://gerrit.cloudera.org:8080/4182
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Reviewed-by: Dan Hecht <dhecht@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1350c347
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1350c347
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1350c347

Branch: refs/heads/master
Commit: 1350c34763d52f703ad1e870abff64fae46d20fa
Parents: 5adedc6
Author: Tim Armstrong <tarmstrong@cloudera.com>
Authored: Tue Aug 30 20:22:54 2016 -0700
Committer: Internal Jenkins <cloudera-hudson@gerrit.cloudera.org>
Committed: Wed Aug 31 21:20:29 2016 +0000

----------------------------------------------------------------------
 be/src/exec/row-batch-list-test.cc              | 15 ++++++++--
 be/src/exec/row-batch-list.h                    | 18 ++++++++++--
 .../queries/QueryTest/nested-types-tpch.test    | 30 ++++++++++++++++++++
 3 files changed, 58 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1350c347/be/src/exec/row-batch-list-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/row-batch-list-test.cc b/be/src/exec/row-batch-list-test.cc
index 250734a..7d36a58 100644
--- a/be/src/exec/row-batch-list-test.cc
+++ b/be/src/exec/row-batch-list-test.cc
@@ -104,12 +104,23 @@ TEST_F(RowBatchListTest, BasicTest) {
 
 // This tests an empty batch is handled correctly.
 TEST_F(RowBatchListTest, EmptyBatchTest) {
+  const int ALLOC_SIZE = 128;
   RowBatchList row_list;
-  RowBatch* batch = pool_.Add(new RowBatch(*desc_, 1, &tracker_));
-  row_list.AddRowBatch(batch);
+  RowBatch* batch1 = pool_.Add(new RowBatch(*desc_, 1, &tracker_));
+  batch1->tuple_data_pool()->Allocate(ALLOC_SIZE);
+  DCHECK_EQ(ALLOC_SIZE, batch1->tuple_data_pool()->total_allocated_bytes());
+
+  row_list.AddRowBatch(batch1);
   EXPECT_EQ(row_list.total_num_rows(), 0);
   RowBatchList::TupleRowIterator it = row_list.Iterator();
   EXPECT_TRUE(it.AtEnd());
+
+  // IMPALA-4049: list should transfer resources attached to empty batch.
+  RowBatch* batch2 = pool_.Add(new RowBatch(*desc_, 1, &tracker_));
+  DCHECK_EQ(0, batch2->tuple_data_pool()->total_allocated_bytes());
+  row_list.TransferResourceOwnership(batch2);
+  DCHECK_EQ(0, batch1->tuple_data_pool()->total_allocated_bytes());
+  DCHECK_EQ(ALLOC_SIZE, batch2->tuple_data_pool()->total_allocated_bytes());
 }
 
 // This tests inserts 100 row batches of 1024 rows each to list.  It validates that they

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1350c347/be/src/exec/row-batch-list.h
----------------------------------------------------------------------
diff --git a/be/src/exec/row-batch-list.h b/be/src/exec/row-batch-list.h
index 3879806..18f2490 100644
--- a/be/src/exec/row-batch-list.h
+++ b/be/src/exec/row-batch-list.h
@@ -58,12 +58,15 @@ class RowBatchList {
       return (*batch_it_)->GetRow(row_idx_);
     }
 
-    /// Increments the iterator. No-op if the iterator is at the end.
+    /// Moves the iterator to the next row. If the current row is the last row in the
+    /// batch, advances to either the next non-empty batch or the end. No-op if the
+    /// iterator is already at the end.
     void Next() {
-      if (batch_it_ == list_->row_batches_.end()) return;
+      if (AtEnd()) return;
       DCHECK_GE((*batch_it_)->num_rows(), 0);
       if (++row_idx_ == (*batch_it_)->num_rows()) {
         ++batch_it_;
+        SkipEmptyBatches();
         row_idx_ = 0;
       }
     }
@@ -75,17 +78,26 @@ class RowBatchList {
       : list_(list),
         batch_it_(list->row_batches_.begin()),
         row_idx_(0) {
+      SkipEmptyBatches();
+    }
+
+    void SkipEmptyBatches() {
+      while (!AtEnd() && (*batch_it_)->num_rows() == 0) ++batch_it_;
     }
 
     RowBatchList* list_;
+
+    /// The current batch. Either a batch with > 0 rows or the end() iterator.
     BatchIterator batch_it_;
+
+    /// The index of the current row in the current batch. Always the index of a valid
+    /// row if 'batch_it_' points to a valid batch.
     int64_t row_idx_;
   };
 
   /// Add the 'row_batch' to the list. The RowBatch* and all of its resources are owned
   /// by the caller.
   void AddRowBatch(RowBatch* row_batch) {
-    if (row_batch->num_rows() == 0) return;
     row_batches_.push_back(row_batch);
     total_num_rows_ += row_batch->num_rows();
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1350c347/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
index 8f15427..5ace4f4 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
@@ -203,3 +203,33 @@ where c_phone='20-968-632-1388' and l_partkey = 127499
 ---- TYPES
 bigint,string,string,smallint,string,decimal,string,string,bigint,string,decimal,string,string,string,int,string,bigint,bigint,int,decimal,decimal,decimal,decimal,string,string,string,string,string,string,string,string
 ====
+---- QUERY
+# IMPALA-4049: non-grouping aggregation with selective predicate in subplan feeding into
+# build side of a nested loop join. Reproduces a memory transfer bug triggered by empty
+# row batches in the build side of the join.
+select straight_join c_custkey, cnt1
+from tpch_nested_parquet.customer c,
+  (select count(*) cnt1 from c.c_orders) v
+where cnt1 = 1
+order by c_custkey
+---- RESULTS
+1910,1
+2855,1
+9938,1
+14996,1
+17480,1
+25622,1
+42239,1
+43360,1
+48365,1
+52973,1
+67328,1
+86840,1
+87212,1
+131732,1
+138173,1
+140732,1
+148949,1
+---- TYPES
+bigint, bigint
+====


Mime
View raw message