hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject [2/2] hive git commit: HIVE-16260: Remove parallel edges of semijoin with map joins. (Deepak Jaiswal via by Jason Dere)
Date Thu, 23 Mar 2017 10:16:13 GMT
HIVE-16260: Remove parallel edges of semijoin with map joins. (Deepak Jaiswal via by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24f1861e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24f1861e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24f1861e

Branch: refs/heads/master
Commit: 24f1861e80685769209d3a010bf93e98a1ac1be2
Parents: 112cbd1
Author: Jason Dere <jdere@hortonworks.com>
Authored: Thu Mar 23 03:14:57 2017 -0700
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Thu Mar 23 03:14:57 2017 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   6 +
 .../DynamicPartitionPruningOptimization.java    |   4 +
 .../hadoop/hive/ql/parse/TezCompiler.java       | 130 +++++-
 .../ql/udf/generic/GenericUDAFBloomFilter.java  |  20 +
 .../clientpositive/dynamic_partition_pruning.q  |   3 +-
 .../clientpositive/dynamic_semijoin_reduction.q |  13 +-
 .../dynamic_semijoin_reduction_2.q              |   2 +
 .../dynamic_semijoin_reduction_3.q              |   2 +
 ql/src/test/queries/clientpositive/mergejoin.q  |   2 +
 .../vectorized_dynamic_partition_pruning.q      |   3 +-
 .../vectorized_dynamic_semijoin_reduction.q     |   2 +
 .../vectorized_dynamic_semijoin_reduction2.q    |   2 +
 .../llap/dynamic_partition_pruning.q.out        |   4 +-
 .../llap/dynamic_semijoin_reduction.q.out       | 422 ++++++++++++++-----
 .../llap/dynamic_semijoin_reduction_2.q.out     |   4 +-
 .../llap/dynamic_semijoin_reduction_3.q.out     |  14 +-
 .../results/clientpositive/llap/mergejoin.q.out |  56 +--
 .../vectorized_dynamic_partition_pruning.q.out  |   4 +-
 .../vectorized_dynamic_semijoin_reduction.q.out |  36 +-
 ...vectorized_dynamic_semijoin_reduction2.q.out |  32 +-
 .../clientpositive/tez/explainanalyze_3.q.out   | 127 ++----
 .../clientpositive/tez/explainuser_3.q.out      | 128 ++----
 22 files changed, 639 insertions(+), 377 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index d4a0b2e..1bc3a6e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2849,8 +2849,14 @@ public class HiveConf extends Configuration {
     TEZ_DYNAMIC_SEMIJOIN_REDUCTION("hive.tez.dynamic.semijoin.reduction", true,
         "When dynamic semijoin is enabled, shuffle joins will perform a leaky semijoin before shuffle. This " +
         "requires hive.tez.dynamic.partition.pruning to be enabled."),
+    TEZ_MIN_BLOOM_FILTER_ENTRIES("hive.tez.min.bloom.filter.entries", 1000000L,
+            "Bloom filter should be of at min certain size to be effective"),
     TEZ_MAX_BLOOM_FILTER_ENTRIES("hive.tez.max.bloom.filter.entries", 100000000L,
             "Bloom filter should be of at max certain size to be effective"),
+    TEZ_BLOOM_FILTER_FACTOR("hive.tez.bloom.filter.factor", (float) 2.0,
+            "Bloom filter should be a multiple of this factor with nDV"),
+    TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION("hive.tez.bigtable.minsize.semijoin.reduction", 1000000L,
+            "Big table for runtime filteting should be of atleast this size"),
     TEZ_SMB_NUMBER_WAVES(
         "hive.tez.smb.number.waves",
         (float) 0.5,

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index c513ee5..727f7bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -507,6 +507,8 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
       GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
       bloomFilterEval.setSourceOperator(selectOp);
       bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
+      bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
+      bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
       bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
       aggs.add(min);
       aggs.add(max);
@@ -603,6 +605,8 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
       GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
       bloomFilterEval.setSourceOperator(selectOp);
       bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
+      bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
+      bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
       bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
 
       aggsFinal.add(min);

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 47b229f..468e18e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -102,10 +102,9 @@ public class TezCompiler extends TaskCompiler {
     perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization");
 
     perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
-    // after the stats phase we might have some cyclic dependencies that we need
-    // to take care of.
-    runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs);
-    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning");
+    // Remove any parallel edge between semijoin and mapjoin.
+    removeSemijoinsParallelToMapJoin(procCtx);
+    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization");
 
     perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
     // Remove semijoin optimization if it creates a cycle with mapside joins
@@ -122,6 +121,12 @@ public class TezCompiler extends TaskCompiler {
     removeSemiJoinIfNoStats(procCtx);
     perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed");
 
+    perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+    // after the stats phase we might have some cyclic dependencies that we need
+    // to take care of.
+    runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs);
+    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning");
+
     // need a new run of the constant folding because we might have created lots
     // of "and true and true" conditions.
     // Rather than run the full constant folding just need to shortcut AND/OR expressions
@@ -781,6 +786,16 @@ public class TezCompiler extends TaskCompiler {
           break;
         }
       }
+
+      // Check if big table is big enough that runtime filtering is
+      // worth it.
+      if (ts.getStatistics() != null) {
+        long numRows = ts.getStatistics().getNumRows();
+        if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) {
+          removeSemiJoin = true;
+        }
+      }
+
       if (removeSemiJoin) {
         // The stats are not annotated, remove the semijoin operator
         GenTezUtils.removeBranch(rs);
@@ -810,4 +825,111 @@ public class TezCompiler extends TaskCompiler {
     GraphWalker ogw = new PreOrderOnceWalker(disp);
     ogw.startWalking(topNodes, null);
   }
+
+  private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS,
+                                             ParseContext parseContext,
+                                             Map<ReduceSinkOperator, TableScanOperator> semijoins) {
+
+    boolean parallelEdges = false;
+    for (Operator<?> op : mapjoin.getParentOperators()) {
+      if (!(op instanceof ReduceSinkOperator)) {
+        continue;
+      }
+
+      op = op.getParentOperators().get(0);
+
+      // Follow the Reducesink operator upstream which is on small table side.
+      while (!(op instanceof ReduceSinkOperator) &&
+              !(op instanceof TableScanOperator) &&
+              !(op.getChildren() != null && op.getChildren().size() > 1)) {
+        op = op.getParentOperators().get(0);
+      }
+
+      // Bail out if RS or TS is encountered.
+      if (op instanceof ReduceSinkOperator || op instanceof TableScanOperator) {
+        continue;
+      }
+
+      // A branch is hit.
+      for (Node nd : op.getChildren()) {
+        if (nd instanceof SelectOperator) {
+          Operator<?> child = (Operator<?>) nd;
+
+          while (child.getChildOperators().size() > 0) {
+            child = child.getChildOperators().get(0);
+          }
+
+          // If not ReduceSink Op, skip
+          if (!(child instanceof ReduceSinkOperator)) {
+            continue;
+          }
+
+          ReduceSinkOperator rs = (ReduceSinkOperator) child;
+          TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs);
+          if (ts == null || ts != bigTableTS) {
+            // skip, no semijoin or not the one we are looking for.
+            continue;
+          }
+
+          // Add the semijoin branch to the map
+          semijoins.put(rs, ts);
+          parallelEdges = true;
+        }
+      }
+    }
+    return parallelEdges;
+  }
+
+  /*
+   *  The algorithm looks at all the mapjoins in the operator pipeline until
+   *  it hits RS Op and for each mapjoin examines if it has paralllel semijoin
+   *  edge.
+   */
+  private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx)
+          throws SemanticException {
+    if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) ||
+            !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN)) {
+      // Not needed without semi-join reduction
+      return;
+    }
+
+    // Get all the TS ops.
+    List<Operator<?>> topOps = new ArrayList<>();
+    topOps.addAll(procCtx.parseContext.getTopOps().values());
+
+    Map<ReduceSinkOperator, TableScanOperator> semijoins = new HashMap<>();
+    for (Operator<?> parent : topOps) {
+      // A TS can have multiple branches due to DPP Or Semijoin Opt.
+      // USe DFS to traverse all the branches until RS is hit.
+      Deque<Operator<?>> deque = new LinkedList<>();
+      deque.add(parent);
+      while (!deque.isEmpty()) {
+        Operator<?> op = deque.poll();
+        if (op instanceof ReduceSinkOperator) {
+          // Done with this branch
+          continue;
+        }
+
+        if (op instanceof MapJoinOperator) {
+          // A candidate.
+          if (!findParallelSemiJoinBranch(op, (TableScanOperator) parent,
+                  procCtx.parseContext, semijoins)) {
+            // No parallel edge was found for the given mapjoin op,
+            // no need to go down further, skip this TS operator pipeline.
+            break;
+          }
+        }
+        deque.addAll(op.getChildOperators());
+      }
+    }
+
+    if (semijoins.size() > 0) {
+      for (ReduceSinkOperator rs : semijoins.keySet()) {
+        GenTezUtils.removeBranch(rs);
+        GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs,
+                semijoins.get(rs));
+      }
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
index 788aace..2b84beb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
@@ -73,6 +73,8 @@ public class GenericUDAFBloomFilter implements GenericUDAFResolver2 {
     // Source operator to get the number of entries
     private SelectOperator sourceOperator;
     private long maxEntries = 0;
+    private long minEntries = 0;
+    private float factor = 1;
 
     // ObjectInspector for input data.
     private PrimitiveObjectInspector inputOI;
@@ -278,6 +280,9 @@ public class GenericUDAFBloomFilter implements GenericUDAFResolver2 {
         }
       }
 
+      // Update expectedEntries based on factor and minEntries configurations
+      expectedEntries = (long) (expectedEntries * factor);
+      expectedEntries = expectedEntries > minEntries ? expectedEntries : minEntries;
       return expectedEntries;
     }
 
@@ -293,6 +298,21 @@ public class GenericUDAFBloomFilter implements GenericUDAFResolver2 {
       this.maxEntries = maxEntries;
     }
 
+    public void setMinEntries(long minEntries) {
+      this.minEntries = minEntries;
+    }
+
+    public long getMinEntries() {
+      return minEntries;
+    }
+
+    public void setFactor(float factor) {
+      this.factor = factor;
+    }
+
+    public float getFactor() {
+      return factor;
+    }
     @Override
     public String getExprString() {
       return "expectedEntries=" + getExpectedEntries();

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q
index d28da6e..9145a36 100644
--- a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q
@@ -6,7 +6,8 @@ set hive.ppd.remove.duplicatefilters=true;
 set hive.tez.dynamic.partition.pruning=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
-
+set hive.tez.min.bloom.filter.entries=1;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
 
 select distinct ds from srcpart;
 select distinct hr from srcpart;

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q
index 5482cdb..f04a923 100644
--- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q
@@ -8,6 +8,8 @@ set hive.tez.dynamic.semijoin.reduction=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
 set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
 
 -- Create Tables
 create table alltypesorc_int ( cint int, cstring string ) stored as ORC;
@@ -67,7 +69,7 @@ select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcp
 set hive.tez.dynamic.semijoin.reduction=true;
 EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
 select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
---set hive.tez.dynamic.semijoin.reduction=false;
+set hive.tez.dynamic.semijoin.reduction=false;
 
 -- With Mapjoins.
 set hive.auto.convert.join=true;
@@ -79,6 +81,15 @@ select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcp
 set hive.tez.dynamic.semijoin.reduction=true;
 EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
 select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+set hive.tez.dynamic.semijoin.reduction=false;
+
+-- multiple sources, different  keys
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+--set hive.tez.dynamic.semijoin.reduction=false;
 
 -- With unions
 explain select * from alltypesorc_int join

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q
index 2306395..88386a6 100644
--- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q
@@ -7,6 +7,8 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.tez.dynamic.semijoin.reduction=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
 
 CREATE TABLE `table_1`(
   `bigint_col_7` bigint,

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q
index 01650f8..d5fe136 100644
--- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q
@@ -12,6 +12,8 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.tez.dynamic.semijoin.reduction=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
 
 -- Try with merge statements
 create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/mergejoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mergejoin.q b/ql/src/test/queries/clientpositive/mergejoin.q
index eecd105..381f253 100644
--- a/ql/src/test/queries/clientpositive/mergejoin.q
+++ b/ql/src/test/queries/clientpositive/mergejoin.q
@@ -9,6 +9,8 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
 set hive.vectorized.execution.enabled=true;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
 
 -- SORT_QUERY_RESULTS
 

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
index d2ded71..cb0e279 100644
--- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
@@ -8,7 +8,8 @@ set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
 set hive.vectorized.execution.enabled=true;
 set hive.fetch.task.conversion=none;
-
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
 
 select distinct ds from srcpart;
 select distinct hr from srcpart;

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q
index 68b57ea..f7c1f3b 100644
--- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q
@@ -7,6 +7,8 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.tez.dynamic.semijoin.reduction=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
 
 set hive.vectorized.adaptor.usage.mode=none;
 set hive.vectorized.execution.enabled=true;

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
index be8e4af..4bdff42 100644
--- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
@@ -7,6 +7,8 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.tez.dynamic.semijoin.reduction=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
 
 set hive.vectorized.adaptor.usage.mode=none;
 set hive.vectorized.execution.enabled=true;

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 96d998f..e514e2e 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -3224,7 +3224,7 @@ STAGE PLANS:
                   outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
-                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE
@@ -3270,7 +3270,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
index d32cb5c..235fed0 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
@@ -327,7 +327,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -375,7 +375,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -744,7 +744,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -757,7 +757,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -790,7 +790,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -840,7 +840,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -852,7 +852,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -864,7 +864,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1080,7 +1080,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1093,7 +1093,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: PARTIAL
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=214)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1141,7 +1141,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1153,7 +1153,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=214)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1410,7 +1410,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=214)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1443,7 +1443,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1507,7 +1507,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=214)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1519,7 +1519,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1568,19 +1568,18 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+        Map 1 <- Map 3 (BROADCAST_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart_date
-                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean)
+                  filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string)
@@ -1624,19 +1623,6 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
-                      Select Operator
-                        expressions: _col0 (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                            value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -1654,18 +1640,6 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
-                mode: final
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
 
   Stage: Stage-0
     Fetch Operator
@@ -1705,19 +1679,18 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+        Map 1 <- Map 3 (BROADCAST_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart_date
-                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean)
+                  filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string)
@@ -1761,12 +1734,300 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+8224
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc_int
+                  filterExpr: cstring is not null (type: boolean)
+                  Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: cstring is not null (type: boolean)
+                    Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: cstring (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col1 (type: string)
+                        outputColumnNames: _col1
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
+                        Map Join Operator
+                          condition map:
+                               Inner Join 0 to 1
+                          keys:
+                            0 _col1 (type: string)
+                            1 _col0 (type: string)
+                          input vertices:
+                            1 Map 4
+                          Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL
+                          Group By Operator
+                            aggregations: count()
+                            mode: hash
+                            outputColumnNames: _col0
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                            Reduce Output Operator
+                              sort order: 
+                              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                              value expressions: _col0 (type: bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: (key is not null and value is not null) (type: boolean)
+                  Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and value is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: string)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+        Map 3 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc_int
+                  filterExpr: cstring is not null (type: boolean)
+                  Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: cstring is not null (type: boolean)
+                    Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: cstring (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col1 (type: string)
+                        outputColumnNames: _col1
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
+                        Map Join Operator
+                          condition map:
+                               Inner Join 0 to 1
+                          keys:
+                            0 _col1 (type: string)
+                            1 _col0 (type: string)
+                          input vertices:
+                            1 Map 4
+                          Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL
+                          Group By Operator
+                            aggregations: count()
+                            mode: hash
+                            outputColumnNames: _col0
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                            Reduce Output Operator
+                              sort order: 
+                              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+                              value expressions: _col0 (type: bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean)
+                  Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: string)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                       Select Operator
                         expressions: _col0 (type: string)
                         outputColumnNames: _col0
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1791,11 +2052,11 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
+        Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1810,8 +2071,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
 PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
 PREHOOK: Input: default@srcpart_date
 PREHOOK: Input: default@srcpart_date@ds=2008-04-08
 PREHOOK: Input: default@srcpart_date@ds=2008-04-09
@@ -1819,8 +2081,9 @@ PREHOOK: Input: default@srcpart_small
 PREHOOK: Input: default@srcpart_small@ds=2008-04-08
 PREHOOK: Input: default@srcpart_small@ds=2008-04-09
 #### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
 POSTHOOK: Input: default@srcpart_date
 POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
 POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
@@ -1828,7 +2091,7 @@ POSTHOOK: Input: default@srcpart_small
 POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
 #### A masked pattern was here ####
-8224
+0
 PREHOOK: query: explain select * from alltypesorc_int join
                                       (select srcpart_date.key as key from srcpart_date
                                        union all
@@ -1848,20 +2111,19 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Reducer 4 (BROADCAST_EDGE), Union 3 (BROADCAST_EDGE)
+        Map 1 <- Union 3 (BROADCAST_EDGE)
         Map 2 <- Union 3 (CONTAINS)
-        Map 5 <- Union 3 (CONTAINS)
-        Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE)
+        Map 4 <- Union 3 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc_int
-                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean)
+                  filterExpr: cstring is not null (type: boolean)
                   Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean)
+                    predicate: cstring is not null (type: boolean)
                     Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cint (type: int), cstring (type: string)
@@ -1904,22 +2166,9 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL
-                      Select Operator
-                        expressions: _col0 (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                            value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: llap
             LLAP IO: all inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: srcpart_small
@@ -1937,33 +2186,8 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL
-                      Select Operator
-                        expressions: _col0 (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                            value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: llap
             LLAP IO: all inputs
-        Reducer 4 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205)
-                mode: final
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
         Union 3 
             Vertex: Union 3
 

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
index 5f75977..d291e7d 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
@@ -184,7 +184,7 @@ STAGE PLANS:
                   outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                   Group By Operator
-                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
@@ -230,7 +230,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
index c272fc1..8950b70 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
@@ -82,7 +82,7 @@ STAGE PLANS:
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                     Group By Operator
-                      aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+                      aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -235,7 +235,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -353,7 +353,7 @@ STAGE PLANS:
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                     Group By Operator
-                      aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+                      aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -406,7 +406,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -501,7 +501,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=4)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -551,7 +551,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=4)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -704,7 +704,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=4)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/24f1861e/ql/src/test/results/clientpositive/llap/mergejoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
index 186600b..c1fde0b 100644
--- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
@@ -63,7 +63,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -95,7 +95,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=14)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -313,7 +313,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -381,7 +381,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1426,7 +1426,7 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
-                        aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                        aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
                         Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1493,7 +1493,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1586,7 +1586,7 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
-                        aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=500)
+                        aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
                         Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1634,7 +1634,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=500)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1823,7 +1823,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1876,7 +1876,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1940,7 +1940,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1952,7 +1952,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=14)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -2026,7 +2026,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE
@@ -2094,7 +2094,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE
@@ -2216,7 +2216,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2267,7 +2267,7 @@ STAGE PLANS:
                   outputColumnNames: _col0
                   Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
-                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508)
+                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2313,7 +2313,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1016)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2372,7 +2372,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE
@@ -2440,7 +2440,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE
@@ -2516,7 +2516,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2569,7 +2569,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -2633,7 +2633,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2645,7 +2645,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=14)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -2769,7 +2769,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2820,7 +2820,7 @@ STAGE PLANS:
                   outputColumnNames: _col0
                   Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
-                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508)
+                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2866,7 +2866,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1016)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2973,7 +2973,7 @@ STAGE PLANS:
                   outputColumnNames: _col0
                   Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
-                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -3019,7 +3019,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE


Mime
View raw message