hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject hive git commit: HIVE-16804: Semijoin hint : Needs support for target table. (Deepak Jaiswal, reviewed by Jason Dere)
Date Thu, 08 Jun 2017 17:13:52 GMT
Repository: hive
Updated Branches:
  refs/heads/master 5861b6af5 -> 41f72dc3e


HIVE-16804: Semijoin hint : Needs support for target table. (Deepak Jaiswal, reviewed by Jason
Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41f72dc3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41f72dc3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41f72dc3

Branch: refs/heads/master
Commit: 41f72dc3eda0e2744ea3787560ef12ec1d994038
Parents: 5861b6a
Author: Jason Dere <jdere@hortonworks.com>
Authored: Thu Jun 8 10:11:35 2017 -0700
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Thu Jun 8 10:11:35 2017 -0700

----------------------------------------------------------------------
 .../DynamicPartitionPruningOptimization.java    |  5 ++
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 22 +++--
 .../hadoop/hive/ql/parse/SemiJoinHint.java      |  8 +-
 .../hadoop/hive/ql/parse/TezCompiler.java       | 16 ++--
 .../test/queries/clientpositive/semijoin_hint.q | 32 +++----
 .../clientpositive/llap/semijoin_hint.q.out     | 91 ++++++++------------
 6 files changed, 89 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index 8a62982..562caf9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -226,6 +226,7 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor
{
               if (tabAliasBuilder.length() > 0) {
                 tableAlias = tabAliasBuilder.toString();
               } else {
+                //falling back
                 Operator<?> op = ctx.generator;
 
                 while (!(op == null || op instanceof TableScanOperator)) {
@@ -361,6 +362,10 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor
{
       if (!colName.equals(sjHint.getColName())) {
         continue;
       }
+      if (!ts.getConf().getAlias().equals(sjHint.getTarget())) {
+        continue;
+      }
+
       // match!
       LOG.info("Creating runtime filter due to user hint: column = " + colName);
       if (generateSemiJoinOperatorPlan(ctx, pCtx, ts, keyBaseAlias,

http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d514644..9e84a29 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9034,8 +9034,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
   /** Parses semjoin hints in the query and returns the table names mapped to filter size,
or -1 if not specified.
    *  Hints can be in 2 formats
-   *  1. TableName, ColumnName, bloom filter entries
-   *  2. TableName, ColumnName
+   *  1. TableName, ColumnName, Target-TableName, bloom filter entries
+   *  2. TableName, ColumnName, Target-TableName
    *  */
   private Map<String, List<SemiJoinHint>> parseSemiJoinHint(List<ASTNode>
hints) throws SemanticException {
     if (hints == null || hints.size() == 0) return null;
@@ -9071,15 +9071,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     throws SemanticException {
     // Check if there are enough entries in the tree to constitute a hint.
     int numEntriesLeft = args.getChildCount() - curIdx;
-    if (numEntriesLeft < 2) {
+    if (numEntriesLeft < 3) {
       throw new SemanticException("User provided only 1 entry for the hint with alias "
               + args.getChild(curIdx).getText());
     }
 
-    String alias = args.getChild(curIdx++).getText();
+    String source = args.getChild(curIdx++).getText();
     // validate
-    if (StringUtils.isNumeric(alias)) {
-      throw new SemanticException("User provided bloom filter entries when alias is expected");
+    if (StringUtils.isNumeric(source)) {
+      throw new SemanticException("User provided bloom filter entries when source alias is
expected");
     }
 
     String colName = args.getChild(curIdx++).getText();
@@ -9088,8 +9088,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       throw new SemanticException("User provided bloom filter entries when column name is
expected");
     }
 
+    String target = args.getChild(curIdx++).getText();
+    // validate
+    if (StringUtils.isNumeric(colName)) {
+      throw new SemanticException("User provided bloom filter entries when target alias is
expected");
+    }
+
     Integer number = null;
-    if (numEntriesLeft > 2) {
+    if (numEntriesLeft > 3) {
       // Check if there exists bloom filter size entry
       try {
         number = Integer.parseInt(args.getChild(curIdx).getText());
@@ -9097,7 +9103,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       } catch (NumberFormatException e) { // Ignore
       }
     }
-    result.computeIfAbsent(alias, value -> new ArrayList<>()).add(new SemiJoinHint(colName,
number));
+    result.computeIfAbsent(source, value -> new ArrayList<>()).add(new SemiJoinHint(colName,
target, number));
     return curIdx;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
index f7fd306..b2c123f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
@@ -20,17 +20,21 @@ package org.apache.hadoop.hive.ql.parse;
 
 public class SemiJoinHint {
   private String colName;
+  private String target;
   private Integer numEntries;
 
-  public SemiJoinHint(String colName, Integer numEntries) {
+  public SemiJoinHint(String colName, String target, Integer numEntries) {
     this.colName = colName;
+    this.target = target;
     this.numEntries = numEntries;
   }
 
   public String getColName() {
     return colName;
   }
-
+  public String getTarget() {
+    return target;
+  }
   public Integer getNumEntries() {
     return numEntries != null ? numEntries : -1;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 7e156f6..20f16fb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -743,15 +743,15 @@ public class TezCompiler extends TaskCompiler {
         SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs);
         if (sjInfo != null && ts == sjInfo.getTsOp()) {
           // match!
+          if (sjInfo.getIsHint()) {
+            throw new SemanticException("Removing hinted semijoin as it is with SMB join
" + rs + " : " + ts);
+          }
           if (LOG.isDebugEnabled()) {
             LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin "
                     + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
           }
           GenTezUtils.removeBranch(rs);
           GenTezUtils.removeSemiJoinOperator(pctx, rs, ts);
-          if (sjInfo.getIsHint()) {
-            LOG.debug("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts);
-          }
         }
       }
     }
@@ -848,15 +848,15 @@ public class TezCompiler extends TaskCompiler {
 
           if (parent == ts) {
             // We have a cycle!
+            if (sjInfo.getIsHint()) {
+              throw new SemanticException("Removing hinted semijoin as it is creating cycles
with mapside joins " + rs + " : " + ts);
+            }
             if (LOG.isDebugEnabled()) {
               LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin "
                   + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
             }
             GenTezUtils.removeBranch(rs);
             GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
-            if (sjInfo.getIsHint()) {
-              LOG.debug("Removing hinted semijoin as it is creating cycles with mapside joins
" + rs + " : " + ts);
-            }
           }
         }
       }
@@ -895,6 +895,10 @@ public class TezCompiler extends TaskCompiler {
         long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries();
         if (expectedEntries == -1 || expectedEntries >
                 pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) {
+          if (sjInfo.getIsHint()) {
+            throw new SemanticException("Removing hinted semijoin due to lack to stats" +
+            " or exceeding max bloom filter entries");
+          }
           // Remove the semijoin optimization branch along with ALL the mappings
           // The parent GB2 has all the branches. Collect them and remove them.
           for (Operator<?> op : gbOp.getChildOperators()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/test/queries/clientpositive/semijoin_hint.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q
index 71fa445..2a15344 100644
--- a/ql/src/test/queries/clientpositive/semijoin_hint.q
+++ b/ql/src/test/queries/clientpositive/semijoin_hint.q
@@ -49,51 +49,51 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str
= v.k
 -- Skip semijoin by using keyword "None" as argument
 explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str
= v.key1);
 
-EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small
v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small
v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
+EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
 
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v
on (k.str = v.key1);
+explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1);
 
 -- This should NOT create a semijoin
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v
on (k.value = v.key1);
+explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.value = v.key1);
 
 set hive.cbo.returnpath.hiveop=false;
 
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s
on (k.str = s.key1)
+explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small
s on (k.str = s.key1)
         union all
-        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1);
+        select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1);
 
 -- Query which creates semijoin
 explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 -- Skip semijoin by using keyword "None" as argument
 explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str
= v.key1);
 
-EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small
v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small
v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
+EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
 
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v
on (k.str = v.key1);
+explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1);
 
 -- This should NOT create a semijoin
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v
on (k.value = v.key1);
+explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.value = v.key1);
 
 
 
 set hive.cbo.enable=false;
 
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s
on (k.str = s.key1)
+explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small
s on (k.str = s.key1)
         union all
-        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1);
+        select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1);
 
 -- Query which creates semijoin
 explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 -- Skip semijoin by using keyword "None" as argument
 explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str
= v.key1);
 
-EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small
v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small
v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
+EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
 
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v
on (k.str = v.key1);
+explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.str = v.key1);
 
 -- This should NOT create a semijoin
-explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v
on (k.value = v.key1);
+explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small
v on (k.value = v.key1);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
index ae9bf9b..76c985e 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
@@ -387,9 +387,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -552,9 +552,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -566,7 +566,6 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Map 5 <- Reducer 4 (BROADCAST_EDGE)
-        Map 6 <- Reducer 4 (BROADCAST_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
         Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE)
@@ -629,10 +628,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: k
-                  filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min)
AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter))))
(type: boolean)
+                  filterExpr: str is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
                   Filter Operator
-                    predicate: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min)
AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter))))
(type: boolean)
+                    predicate: str is not null (type: boolean)
                     Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
                     Select Operator
                       expressions: str (type: string)
@@ -695,10 +694,6 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
                   value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
 
   Stage: Stage-0
     Fetch Operator
@@ -706,9 +701,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
+PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.str = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -833,9 +828,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
+PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.value = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -933,13 +928,13 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small s on (k.str = s.key1)
+PREHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join
srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
+        select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small s on (k.str = s.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k
join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
+        select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1390,9 +1385,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1553,9 +1548,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1567,7 +1562,6 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Map 5 <- Reducer 4 (BROADCAST_EDGE)
-        Map 6 <- Reducer 4 (BROADCAST_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
         Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE)
@@ -1630,10 +1624,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: k
-                  filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min)
AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter))))
(type: boolean)
+                  filterExpr: str is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
                   Filter Operator
-                    predicate: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min)
AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter))))
(type: boolean)
+                    predicate: str is not null (type: boolean)
                     Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
                     Select Operator
                       expressions: str (type: string)
@@ -1694,10 +1688,6 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
                   value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
 
   Stage: Stage-0
     Fetch Operator
@@ -1705,9 +1695,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
+PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.str = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1830,9 +1820,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
+PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.value = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1928,13 +1918,13 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small s on (k.str = s.key1)
+PREHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join
srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
+        select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small s on (k.str = s.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k
join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
+        select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small
v on (d.str = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2353,9 +2343,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date
join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value
= i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2504,9 +2494,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date
k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2517,7 +2507,6 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Reducer 6 (BROADCAST_EDGE)
         Map 4 <- Reducer 6 (BROADCAST_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
@@ -2528,10 +2517,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: k
-                  filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min)
AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter))))
(type: boolean)
+                  filterExpr: str is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
                   Filter Operator
-                    predicate: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min)
AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter))))
(type: boolean)
+                    predicate: str is not null (type: boolean)
                     Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
                     Reduce Output Operator
                       key expressions: str (type: string)
@@ -2633,10 +2622,6 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
                   value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
 
   Stage: Stage-0
     Fetch Operator
@@ -2644,9 +2629,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
+PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.str = v.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.str = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2761,9 +2746,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
+PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join
srcpart_small v on (k.value = v.key1)
+POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k
join srcpart_small v on (k.value = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage


Mime
View raw message