Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 8DBEB200B50 for ; Fri, 15 Jul 2016 06:31:18 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 8C574160A85; Fri, 15 Jul 2016 04:31:18 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E68B4160A63 for ; Fri, 15 Jul 2016 06:31:16 +0200 (CEST) Received: (qmail 28297 invoked by uid 500); 15 Jul 2016 04:31:15 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 28286 invoked by uid 99); 15 Jul 2016 04:31:15 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Jul 2016 04:31:15 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B46E5E964E; Fri, 15 Jul 2016 04:31:15 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hashutosh@apache.org To: commits@hive.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-14228 : Better row count estimates for outer join during physical planning (Ashutosh Chauhan via Jesus Camacho Rodriguez) Date: Fri, 15 Jul 2016 04:31:15 +0000 (UTC) archived-at: Fri, 15 Jul 2016 04:31:18 -0000 Repository: hive Updated Branches: refs/heads/branch-2.1 b48850860 -> 0974ccf58 HIVE-14228 : Better row count estimates for outer join during physical planning (Ashutosh Chauhan via Jesus Camacho Rodriguez) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0974ccf5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0974ccf5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0974ccf5 Branch: refs/heads/branch-2.1 Commit: 0974ccf5867ca0fce08200c8d5768aeabbf5e326 Parents: b488508 Author: Ashutosh Chauhan Authored: Wed Jul 13 09:17:05 2016 -0700 Committer: Ashutosh Chauhan Committed: Thu Jul 14 21:28:25 2016 -0700 ---------------------------------------------------------------------- .../stats/annotation/StatsRulesProcFactory.java | 39 ++- .../clientpositive/annotate_stats_join.q | 11 + .../clientpositive/annotate_stats_join.q.out | 259 +++++++++++++++++ .../spark/annotate_stats_join.q.out | 291 +++++++++++++++++++ .../clientpositive/tez/explainuser_1.q.out | 16 +- 5 files changed, 603 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 5625091..2d0417a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; @@ -61,6 +62,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -1469,8 +1471,8 @@ public class StatsRulesProcFactory { // update join statistics stats.setColumnStats(outColStats); - long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom); - updateStatsForJoinType(stats, newRowCount, jop, rowCountParents); + long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop); + updateColStats(stats, newRowCount, jop, rowCountParents); jop.setStatistics(stats); if (isDebugEnabled) { @@ -1644,7 +1646,7 @@ public class StatsRulesProcFactory { newNumRows = newrows; } else { // there is more than one FK - newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals)); + newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals), jop); } return newNumRows; } @@ -1764,7 +1766,7 @@ public class StatsRulesProcFactory { return result; } - private void updateStatsForJoinType(Statistics stats, long newNumRows, + private void updateColStats(Statistics stats, long newNumRows, CommonJoinOperator jop, Map rowCountParents) { @@ -1812,7 +1814,7 @@ public class StatsRulesProcFactory { stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize)); } - private long computeNewRowCount(List rowCountParents, long denom) { + private long computeNewRowCount(List rowCountParents, long denom, CommonJoinOperator join) { double factor = 0.0d; long result = 1; long max = rowCountParents.get(0); @@ -1838,6 +1840,33 @@ public class StatsRulesProcFactory { result = (long) (result * factor); + if (join.getConf().getConds().length == 1) { + JoinCondDesc joinCond = join.getConf().getConds()[0]; + switch (joinCond.getType()) { + case JoinDesc.INNER_JOIN: + // only dealing with special join types here. + break; + case JoinDesc.LEFT_OUTER_JOIN : + // all rows from left side will be present in resultset + result = Math.max(rowCountParents.get(joinCond.getLeft()),result); + break; + case JoinDesc.RIGHT_OUTER_JOIN : + // all rows from right side will be present in resultset + result = Math.max(rowCountParents.get(joinCond.getRight()),result); + break; + case JoinDesc.FULL_OUTER_JOIN : + // all rows from both side will be present in resultset + result = Math.max(StatsUtils.safeAdd(rowCountParents.get(joinCond.getRight()), rowCountParents.get(joinCond.getLeft())),result); + break; + case JoinDesc.LEFT_SEMI_JOIN : + // max # of rows = rows from left side + result = Math.min(rowCountParents.get(joinCond.getLeft()),result); + break; + default: + LOG.debug("Unhandled join type in stats estimation: " + joinCond.getType()); + break; + } + } return result; } http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/queries/clientpositive/annotate_stats_join.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/annotate_stats_join.q b/ql/src/test/queries/clientpositive/annotate_stats_join.q index bd5f642..015c647 100644 --- a/ql/src/test/queries/clientpositive/annotate_stats_join.q +++ b/ql/src/test/queries/clientpositive/annotate_stats_join.q @@ -68,3 +68,14 @@ explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on -- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state); +-- left outer join +explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname; + +-- left semi join +explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname; + +-- right outer join +explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname; + +-- full outer join +explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname; http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/results/clientpositive/annotate_stats_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index 223a7ce..4398f1b 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -687,3 +687,262 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: -- left outer join +explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- left outer join +explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- left semi join +explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- left semi join +explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (lastname is not null and deptid is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptname (type: string), deptid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- right outer join +explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- right outer join +explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- full outer join +explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- full outer join +explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 2a42b3c..30d10f7 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -749,3 +749,294 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: -- left outer join +explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- left outer join +explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- left semi join +explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- left semi join +explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (lastname is not null and deptid is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptname (type: string), deptid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- right outer join +explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- right outer join +explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- full outer join +explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- full outer join +explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/0974ccf5/ql/src/test/results/clientpositive/tez/explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index d4b29c6..e7bd381 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -1569,9 +1569,9 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_12] - Select Operator [SEL_11] (rows=11 width=4) + Select Operator [SEL_11] (rows=9 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_17] (rows=11 width=4) + Merge Join Operator [MERGEJOIN_17] (rows=9 width=4) Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_8] @@ -1849,7 +1849,7 @@ Stage-0 Output:["_col0","_col1"] Filter Operator [FIL_12] (rows=1 width=269) predicate:_col3 is null - Merge Join Operator [MERGEJOIN_17] (rows=193 width=269) + Merge Join Operator [MERGEJOIN_17] (rows=500 width=269) Conds:RS_9._col1=RS_10._col1(Left Outer),Output:["_col0","_col1","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_9] @@ -1911,7 +1911,7 @@ Stage-0 Output:["_col0","_col1"] Filter Operator [FIL_12] (rows=1 width=265) predicate:_col3 is null - Merge Join Operator [MERGEJOIN_17] (rows=1 width=265) + Merge Join Operator [MERGEJOIN_17] (rows=250 width=265) Conds:RS_9._col0, _col1=RS_10._col1, _col0(Left Outer),Output:["_col0","_col1","_col3"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_10] @@ -2341,7 +2341,7 @@ Stage-0 Output:["_col0","_col1"] Filter Operator [FIL_21] (rows=1 width=265) predicate:_col3 is null - Merge Join Operator [MERGEJOIN_29] (rows=404 width=265) + Merge Join Operator [MERGEJOIN_29] (rows=500 width=265) Conds:RS_18._col0=RS_19._col0(Left Outer),Output:["_col0","_col1","_col3"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_19] @@ -2413,7 +2413,7 @@ Stage-0 Output:["_col0","_col1","_col2"] Filter Operator [FIL_20] (rows=1 width=344) predicate:_col4 is null - Merge Join Operator [MERGEJOIN_27] (rows=1 width=344) + Merge Join Operator [MERGEJOIN_27] (rows=26 width=344) Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4"] <-Map 6 [SIMPLE_EDGE] SHUFFLE [RS_18] @@ -2491,7 +2491,7 @@ Stage-0 Output:["_col0","_col1"] Filter Operator [FIL_31] (rows=1 width=133) predicate:_col3 is null - Merge Join Operator [MERGEJOIN_41] (rows=1 width=133) + Merge Join Operator [MERGEJOIN_41] (rows=26 width=133) Conds:RS_28.UDFToDouble(_col1)=RS_29._col0(Left Outer),Output:["_col0","_col1","_col3"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_28] @@ -2583,7 +2583,7 @@ Stage-0 Output:["_col0","_col1"] Filter Operator [FIL_33] (rows=1 width=204) predicate:_col3 is null - Merge Join Operator [MERGEJOIN_42] (rows=1 width=204) + Merge Join Operator [MERGEJOIN_42] (rows=5 width=204) Conds:RS_30._col0, _col1=RS_31._col0, _col1(Left Outer),Output:["_col0","_col1","_col3"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_31]