Mailing-List: contact commits-help@hive.apache.org; run by ezmlm
Precedence: bulk
Reply-To: hive-dev@hive.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: hashutosh@apache.org
To: commits@hive.apache.org
Message-Id: <e5be16be1625443abf920e019b31d536@git.apache.org>
Subject: hive git commit: HIVE-14228 : Better row count estimates for outer
 join during physical planning (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Date: Fri, 15 Jul 2016 04:31:15 +0000 (UTC)
archived-at: Fri, 15 Jul 2016 04:31:18 -0000

Repository: hive
Updated Branches:
  refs/heads/branch-2.1 b48850860 -> 0974ccf58


HIVE-14228 : Better row count estimates for outer join during physical planning (Ashutosh Chauhan via Jesus Camacho Rodriguez)

Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0974ccf5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0974ccf5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0974ccf5

Branch: refs/heads/branch-2.1
Commit: 0974ccf5867ca0fce08200c8d5768aeabbf5e326
Parents: b488508
Author: Ashutosh Chauhan <hashutosh@apache.org>
Authored: Wed Jul 13 09:17:05 2016 -0700
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Thu Jul 14 21:28:25 2016 -0700

----------------------------------------------------------------------
 .../stats/annotation/StatsRulesProcFactory.java |  39 ++-
 .../clientpositive/annotate_stats_join.q        |  11 +
 .../clientpositive/annotate_stats_join.q.out    | 259 +++++++++++++++++
 .../spark/annotate_stats_join.q.out             | 291 +++++++++++++++++++
 .../clientpositive/tez/explainuser_1.q.out      |  16 +-
 5 files changed, 603 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 5625091..2d0417a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.LimitOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
@@ -61,6 +62,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -1469,8 +1471,8 @@ public class StatsRulesProcFactory {
 
         // update join statistics
         stats.setColumnStats(outColStats);
-        long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom);
-        updateStatsForJoinType(stats, newRowCount, jop, rowCountParents);
+        long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop);
+        updateColStats(stats, newRowCount, jop, rowCountParents);
         jop.setStatistics(stats);
 
         if (isDebugEnabled) {
@@ -1644,7 +1646,7 @@ public class StatsRulesProcFactory {
         newNumRows = newrows;
       } else {
         // there is more than one FK
-        newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
+        newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals), jop);
       }
       return newNumRows;
     }
@@ -1764,7 +1766,7 @@ public class StatsRulesProcFactory {
       return result;
     }
 
-    private void updateStatsForJoinType(Statistics stats, long newNumRows,
+    private void updateColStats(Statistics stats, long newNumRows,
         CommonJoinOperator<? extends JoinDesc> jop,
         Map<Integer, Long> rowCountParents) {
 
@@ -1812,7 +1814,7 @@ public class StatsRulesProcFactory {
       stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize));
     }
 
-    private long computeNewRowCount(List<Long> rowCountParents, long denom) {
+    private long computeNewRowCount(List<Long> rowCountParents, long denom, CommonJoinOperator<? extends JoinDesc> join) {
       double factor = 0.0d;
       long result = 1;
       long max = rowCountParents.get(0);
@@ -1838,6 +1840,33 @@ public class StatsRulesProcFactory {
 
       result = (long) (result * factor);
 
+      if (join.getConf().getConds().length == 1) {
+        JoinCondDesc joinCond = join.getConf().getConds()[0];
+        switch (joinCond.getType()) {
+          case JoinDesc.INNER_JOIN:
+            // only dealing with special join types here.
+            break;
+          case JoinDesc.LEFT_OUTER_JOIN :
+            // all rows from left side will be present in resultset
+            result = Math.max(rowCountParents.get(joinCond.getLeft()),result);
+            break;
+          case JoinDesc.RIGHT_OUTER_JOIN :
+            // all rows from right side will be present in resultset
+            result = Math.max(rowCountParents.get(joinCond.getRight()),result);
+            break;
+          case JoinDesc.FULL_OUTER_JOIN :
+            // all rows from both side will be present in resultset
+            result = Math.max(StatsUtils.safeAdd(rowCountParents.get(joinCond.getRight()), rowCountParents.get(joinCond.getLeft())),result);
+            break;
+          case JoinDesc.LEFT_SEMI_JOIN :
+            // max # of rows = rows from left side
+            result = Math.min(rowCountParents.get(joinCond.getLeft()),result);
+            break;
+          default:
+            LOG.debug("Unhandled join type in stats estimation: " + joinCond.getType());
+            break;
+        }
+      }
       return result;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/queries/clientpositive/annotate_stats_join.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/annotate_stats_join.q b/ql/src/test/queries/clientpositive/annotate_stats_join.q
index bd5f642..015c647 100644
--- a/ql/src/test/queries/clientpositive/annotate_stats_join.q
+++ b/ql/src/test/queries/clientpositive/annotate_stats_join.q
@@ -68,3 +68,14 @@ explain select * from emp e join dept d  on (e.deptid = d.deptid) join loc l on
 -- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1
 explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state);
 
+-- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;

http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/results/clientpositive/annotate_stats_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out
index 223a7ce..4398f1b 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out
@@ -687,3 +687,262 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: emp
+            Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: lastname (type: string), deptid (type: int), locid (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                value expressions: _col2 (type: int)
+          TableScan
+            alias: dept
+            Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: deptid (type: int), deptname (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col1 (type: string), _col0 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string), _col1 (type: int)
+            1 _col1 (type: string), _col0 (type: int)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: emp
+            Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (lastname is not null and deptid is not null) (type: boolean)
+              Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: lastname (type: string), deptid (type: int), locid (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                  Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: int)
+          TableScan
+            alias: dept
+            Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (deptid is not null and deptname is not null) (type: boolean)
+              Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: deptname (type: string), deptid (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                    Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Semi Join 0 to 1
+          keys:
+            0 _col0 (type: string), _col1 (type: int)
+            1 _col0 (type: string), _col1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: emp
+            Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: lastname (type: string), deptid (type: int), locid (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                value expressions: _col2 (type: int)
+          TableScan
+            alias: dept
+            Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: deptid (type: int), deptname (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col1 (type: string), _col0 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          keys:
+            0 _col0 (type: string), _col1 (type: int)
+            1 _col1 (type: string), _col0 (type: int)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: emp
+            Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: lastname (type: string), deptid (type: int), locid (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                value expressions: _col2 (type: int)
+          TableScan
+            alias: dept
+            Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: deptid (type: int), deptname (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col1 (type: string), _col0 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Outer Join 0 to 1
+          keys:
+            0 _col0 (type: string), _col1 (type: int)
+            1 _col1 (type: string), _col0 (type: int)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
index 2a42b3c..30d10f7 100644
--- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
@@ -749,3 +749,294 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: emp
+                  Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: lastname (type: string), deptid (type: int), locid (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                      Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: int)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: dept
+                  Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: deptid (type: int), deptname (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string), _col0 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: emp
+                  Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (lastname is not null and deptid is not null) (type: boolean)
+                    Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: lastname (type: string), deptid (type: int), locid (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                        Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: int)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: dept
+                  Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (deptid is not null and deptname is not null) (type: boolean)
+                    Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: deptname (type: string), deptid (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: string), _col1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: int)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                          Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: int)
+                  1 _col0 (type: string), _col1 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: emp
+                  Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: lastname (type: string), deptid (type: int), locid (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                      Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: int)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: dept
+                  Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: deptid (type: int), deptname (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string), _col0 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: emp
+                  Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: lastname (type: string), deptid (type: int), locid (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                      Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: int)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: dept
+                  Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: deptid (type: int), deptname (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string), _col0 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index d4b29c6..e7bd381 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -1569,9 +1569,9 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=11 width=4)
+        Select Operator [SEL_11] (rows=9 width=4)
           Output:["_col0"]
-          Merge Join Operator [MERGEJOIN_17] (rows=11 width=4)
+          Merge Join Operator [MERGEJOIN_17] (rows=9 width=4)
             Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"]
           <-Map 1 [SIMPLE_EDGE]
             SHUFFLE [RS_8]
@@ -1849,7 +1849,7 @@ Stage-0
           Output:["_col0","_col1"]
           Filter Operator [FIL_12] (rows=1 width=269)
             predicate:_col3 is null
-            Merge Join Operator [MERGEJOIN_17] (rows=193 width=269)
+            Merge Join Operator [MERGEJOIN_17] (rows=500 width=269)
               Conds:RS_9._col1=RS_10._col1(Left Outer),Output:["_col0","_col1","_col3"]
             <-Map 1 [SIMPLE_EDGE]
               SHUFFLE [RS_9]
@@ -1911,7 +1911,7 @@ Stage-0
           Output:["_col0","_col1"]
           Filter Operator [FIL_12] (rows=1 width=265)
             predicate:_col3 is null
-            Merge Join Operator [MERGEJOIN_17] (rows=1 width=265)
+            Merge Join Operator [MERGEJOIN_17] (rows=250 width=265)
               Conds:RS_9._col0, _col1=RS_10._col1, _col0(Left Outer),Output:["_col0","_col1","_col3"]
             <-Map 4 [SIMPLE_EDGE]
               SHUFFLE [RS_10]
@@ -2341,7 +2341,7 @@ Stage-0
               Output:["_col0","_col1"]
               Filter Operator [FIL_21] (rows=1 width=265)
                 predicate:_col3 is null
-                Merge Join Operator [MERGEJOIN_29] (rows=404 width=265)
+                Merge Join Operator [MERGEJOIN_29] (rows=500 width=265)
                   Conds:RS_18._col0=RS_19._col0(Left Outer),Output:["_col0","_col1","_col3"]
                 <-Map 7 [SIMPLE_EDGE]
                   SHUFFLE [RS_19]
@@ -2413,7 +2413,7 @@ Stage-0
           Output:["_col0","_col1","_col2"]
           Filter Operator [FIL_20] (rows=1 width=344)
             predicate:_col4 is null
-            Merge Join Operator [MERGEJOIN_27] (rows=1 width=344)
+            Merge Join Operator [MERGEJOIN_27] (rows=26 width=344)
               Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4"]
             <-Map 6 [SIMPLE_EDGE]
               SHUFFLE [RS_18]
@@ -2491,7 +2491,7 @@ Stage-0
               Output:["_col0","_col1"]
               Filter Operator [FIL_31] (rows=1 width=133)
                 predicate:_col3 is null
-                Merge Join Operator [MERGEJOIN_41] (rows=1 width=133)
+                Merge Join Operator [MERGEJOIN_41] (rows=26 width=133)
                   Conds:RS_28.UDFToDouble(_col1)=RS_29._col0(Left Outer),Output:["_col0","_col1","_col3"]
                 <-Reducer 2 [SIMPLE_EDGE]
                   SHUFFLE [RS_28]
@@ -2583,7 +2583,7 @@ Stage-0
               Output:["_col0","_col1"]
               Filter Operator [FIL_33] (rows=1 width=204)
                 predicate:_col3 is null
-                Merge Join Operator [MERGEJOIN_42] (rows=1 width=204)
+                Merge Join Operator [MERGEJOIN_42] (rows=5 width=204)
                   Conds:RS_30._col0, _col1=RS_31._col0, _col1(Left Outer),Output:["_col0","_col1","_col3"]
                 <-Reducer 10 [SIMPLE_EDGE]
                   SHUFFLE [RS_31]