Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C344B189B3 for ; Fri, 20 Nov 2015 21:46:42 +0000 (UTC) Received: (qmail 32693 invoked by uid 500); 20 Nov 2015 21:46:42 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 32573 invoked by uid 500); 20 Nov 2015 21:46:41 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 32027 invoked by uid 99); 20 Nov 2015 21:46:41 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Nov 2015 21:46:41 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 966AEE33F1; Fri, 20 Nov 2015 21:46:41 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: omalley@apache.org To: commits@hive.apache.org Date: Fri, 20 Nov 2015 21:46:48 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [08/12] hive git commit: HIVE-11684: Implement limit pushdown through outer join in CBO (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran) HIVE-11684: Implement limit pushdown through outer join in CBO (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39bd58b2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39bd58b2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39bd58b2 Branch: refs/heads/master-fixed Commit: 39bd58b2f6c1614bc8f2eb9a28e0d039c7df2f73 Parents: 2d25cf2 Author: Jesus Camacho Rodriguez Authored: Mon Nov 16 11:32:21 2015 +0100 Committer: Owen O'Malley Committed: Fri Nov 20 13:46:23 2015 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 11 + .../test/resources/testconfiguration.properties | 1 + .../calcite/reloperators/HiveSortLimit.java | 15 +- .../rules/HiveProjectSortTransposeRule.java | 66 ++ .../calcite/rules/HiveSortJoinReduceRule.java | 135 +++ .../calcite/rules/HiveSortMergeRule.java | 78 ++ .../rules/HiveSortProjectTransposeRule.java | 79 ++ .../calcite/rules/HiveSortRemoveRule.java | 79 ++ .../calcite/stats/HiveRelMdRowCount.java | 18 + .../calcite/stats/HiveRelMdSelectivity.java | 10 + .../hadoop/hive/ql/parse/CalcitePlanner.java | 32 +- .../clientpositive/limit_join_transpose.q | 99 +++ .../clientpositive/tez_dynpart_hashjoin_3.q | 24 + .../clientpositive/join32_lessSize.q.out | 46 +- .../results/clientpositive/join_merging.q.out | 34 +- .../clientpositive/limit_join_transpose.q.out | 869 +++++++++++++++++++ .../clientpositive/spark/join32_lessSize.q.out | 122 +-- .../clientpositive/spark/join_merging.q.out | 76 +- .../results/clientpositive/subquery_notin.q.out | 328 ++++--- .../subquery_notin_having.q.java1.7.out | 94 +- .../clientpositive/tez/explainuser_1.q.out | 171 ++-- .../tez/tez_dynpart_hashjoin_3.q.out | 200 +++++ 22 files changed, 2137 insertions(+), 450 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 4f8209a..09f3c3e 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1311,6 +1311,17 @@ public class HiveConf extends Configuration { "we are increasing the number of files possibly by a big margin. So, we merge aggressively."), HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit intra-query correlations."), + HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE("hive.optimize.limitjointranspose", false, + "Whether to push a limit through left/right outer join. If the value is true and the size of the outer\n" + + "input is reduced enough (as specified in hive.optimize.limitjointranspose.reduction), the limit is pushed\n" + + "to the outer input; to remain semantically correct, the limit is kept on top of the join too."), + HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limitjointranspose.reductionpercentage", 1.0f, + "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction of the\n" + + "size of the outer input of the join that we should get in order to apply the rule."), + HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limitjointranspose.reductiontuples", (long) 0, + "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction in the\n" + + "number of tuples of the outer input of the join that you should get in order to apply the rule."), + HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", false, "Whether to create a separate plan for skewed keys for the tables in the join.\n" + "This is based on the skewed keys stored in the metadata. At compile time, the plan is broken\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b86dd19..0c5140c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -388,6 +388,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_insert_overwrite_local_directory_1.q,\ tez_dynpart_hashjoin_1.q,\ tez_dynpart_hashjoin_2.q,\ + tez_dynpart_hashjoin_3.q,\ tez_vector_dynpart_hashjoin_1.q,\ tez_vector_dynpart_hashjoin_2.q,\ tez_join_hash.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java index 82fdc0e..3077177 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java @@ -44,6 +44,8 @@ public class HiveSortLimit extends Sort implements HiveRelNode { // 4. This is used by ASTConverter after we are done with Calcite Planning private ImmutableMap mapOfInputRefToRexCall; + private boolean ruleCreated; + public HiveSortLimit(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RelCollation collation, RexNode offset, RexNode fetch) { super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation, @@ -74,7 +76,10 @@ public class HiveSortLimit extends Sort implements HiveRelNode { // TODO: can we blindly copy sort trait? What if inputs changed and we // are now sorting by different cols RelCollation canonizedCollation = traitSet.canonize(newCollation); - return new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch); + HiveSortLimit sortLimit = + new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch); + sortLimit.setRuleCreated(ruleCreated); + return sortLimit; } public RexNode getFetchExpr() { @@ -93,6 +98,14 @@ public class HiveSortLimit extends Sort implements HiveRelNode { public void implement(Implementor implementor) { } + public boolean isRuleCreated() { + return ruleCreated; + } + + public void setRuleCreated(boolean ruleCreated) { + this.ruleCreated = ruleCreated; + } + private static class HiveSortRelFactory implements RelFactories.SortFactory { @Override http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java new file mode 100644 index 0000000..9d74b19 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java @@ -0,0 +1,66 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +import com.google.common.collect.ImmutableList; + +public class HiveProjectSortTransposeRule extends RelOptRule { + + public static final HiveProjectSortTransposeRule INSTANCE = + new HiveProjectSortTransposeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveProjectSortTransposeRule. + */ + private HiveProjectSortTransposeRule() { + super( + operand( + HiveProject.class, + operand(HiveSortLimit.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + final HiveProject project = call.rel(0); + final HiveSortLimit sort = call.rel(1); + + // Determine mapping between project input and output fields. If sort + // relies on non-trivial expressions, we can't push. + final Mappings.TargetMapping map = + RelOptUtil.permutation( + project.getProjects(), project.getInput().getRowType()).inverse(); + for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { + if (map.getTarget(fc.getFieldIndex()) < 0) { + return; + } + } + + // Create new collation + final RelCollation newCollation = + RelCollationTraitDef.INSTANCE.canonize( + RexUtil.apply(map, sort.getCollation())); + + // New operators + final RelNode newProject = project.copy(sort.getInput().getTraitSet(), + ImmutableList.of(sort.getInput())); + final HiveSortLimit newSort = sort.copy(newProject.getTraitSet(), + newProject, newCollation, sort.offset, sort.fetch); + + call.transformTo(newSort); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java new file mode 100644 index 0000000..d8b9c54 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexLiteral; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +/** + * Planner rule that pushes + * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit} + * past a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin}. + */ +public class HiveSortJoinReduceRule extends RelOptRule { + + public static final HiveSortJoinReduceRule INSTANCE = + new HiveSortJoinReduceRule(); + + //~ Constructors ----------------------------------------------------------- + + private HiveSortJoinReduceRule() { + super( + operand( + HiveSortLimit.class, + operand(HiveJoin.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + final HiveJoin join = call.rel(1); + + // If sort does not contain a limit operation, we bail out + if (!HiveCalciteUtil.limitRelNode(sortLimit)) { + return false; + } + + // 1) If join is not a left or right outer, we bail out + // 2) If any sort column is not part of the input where the + // sort is pushed, we bail out + RelNode reducedInput; + if (join.getJoinType() == JoinRelType.LEFT) { + reducedInput = join.getLeft(); + if (sortLimit.getCollation() != RelCollations.EMPTY) { + for (RelFieldCollation relFieldCollation + : sortLimit.getCollation().getFieldCollations()) { + if (relFieldCollation.getFieldIndex() + >= join.getLeft().getRowType().getFieldCount()) { + return false; + } + } + } + } else if (join.getJoinType() == JoinRelType.RIGHT) { + reducedInput = join.getRight(); + if (sortLimit.getCollation() != RelCollations.EMPTY) { + for (RelFieldCollation relFieldCollation + : sortLimit.getCollation().getFieldCollations()) { + if (relFieldCollation.getFieldIndex() + < join.getLeft().getRowType().getFieldCount()) { + return false; + } + } + } + } else { + return false; + } + + // Finally, if we do not reduce the input size, we bail out + if (RexLiteral.intValue(sortLimit.fetch) + >= RelMetadataQuery.getRowCount(reducedInput)) { + return false; + } + + return true; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + final HiveJoin join = call.rel(1); + RelNode inputLeft = join.getLeft(); + RelNode inputRight = join.getRight(); + + // We create a new sort operator on the corresponding input + if (join.getJoinType() == JoinRelType.LEFT) { + inputLeft = sortLimit.copy(sortLimit.getTraitSet(), inputLeft, + sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch); + ((HiveSortLimit) inputLeft).setRuleCreated(true); + } else { + // Adjust right collation + final RelCollation rightCollation = + RelCollationTraitDef.INSTANCE.canonize( + RelCollations.shift(sortLimit.getCollation(), + -join.getLeft().getRowType().getFieldCount())); + inputRight = sortLimit.copy(sortLimit.getTraitSet().replace(rightCollation), inputRight, + rightCollation, sortLimit.offset, sortLimit.fetch); + ((HiveSortLimit) inputRight).setRuleCreated(true); + } + // We copy the join and the top sort operator + RelNode result = join.copy(join.getTraitSet(), join.getCondition(), inputLeft, + inputRight, join.getJoinType(), join.isSemiJoinDone()); + result = sortLimit.copy(sortLimit.getTraitSet(), result, sortLimit.getCollation(), + sortLimit.offset, sortLimit.fetch); + + call.transformTo(result); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java new file mode 100644 index 0000000..ebb20c2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java @@ -0,0 +1,78 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +/** + * This rule will merge two HiveSortLimit operators. + * + * It is applied when the top match is a pure limit operation (no sorting). + * + * If the bottom operator is not synthetic and does not contain a limit, + * we currently bail out. Thus, we avoid a lot of unnecessary limit operations + * in the middle of the execution plan that could create performance regressions. + */ +public class HiveSortMergeRule extends RelOptRule { + + public static final HiveSortMergeRule INSTANCE = + new HiveSortMergeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveSortProjectTransposeRule. + */ + private HiveSortMergeRule() { + super( + operand( + HiveSortLimit.class, + operand(HiveSortLimit.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit topSortLimit = call.rel(0); + final HiveSortLimit bottomSortLimit = call.rel(1); + + // If top operator is not a pure limit, we bail out + if (!HiveCalciteUtil.pureLimitRelNode(topSortLimit)) { + return false; + } + + // If the bottom operator is not synthetic and it does not contain a limit, + // we will bail out; we do not want to end up with limits all over the tree + if (topSortLimit.isRuleCreated() && !bottomSortLimit.isRuleCreated() && + bottomSortLimit.fetch == null) { + return false; + } + + return true; + } + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit topSortLimit = call.rel(0); + final HiveSortLimit bottomSortLimit = call.rel(1); + + // Lowest limit + final RexNode newLimit; + if (bottomSortLimit.fetch != null && RexLiteral.intValue(topSortLimit.fetch) + >= RexLiteral.intValue(bottomSortLimit.fetch)) { + newLimit = bottomSortLimit.fetch; + } else { + newLimit = topSortLimit.fetch; + } + + final HiveSortLimit newSort = bottomSortLimit.copy(bottomSortLimit.getTraitSet(), + bottomSortLimit.getInput(), bottomSortLimit.collation, null, newLimit); + + call.transformTo(newSort); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java new file mode 100644 index 0000000..debfbe0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java @@ -0,0 +1,79 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +import com.google.common.collect.ImmutableList; + +public class HiveSortProjectTransposeRule extends RelOptRule { + + public static final HiveSortProjectTransposeRule INSTANCE = + new HiveSortProjectTransposeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveSortProjectTransposeRule. + */ + private HiveSortProjectTransposeRule() { + super( + operand( + HiveSortLimit.class, + operand(HiveProject.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + + // If does not contain a limit operation, we bail out + if (!HiveCalciteUtil.limitRelNode(sortLimit)) { + return false; + } + + return true; + } + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit sort = call.rel(0); + final HiveProject project = call.rel(1); + + // Determine mapping between project input and output fields. If sort + // relies on non-trivial expressions, we can't push. + final Mappings.TargetMapping map = + RelOptUtil.permutation( + project.getProjects(), project.getInput().getRowType()); + for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { + if (map.getTargetOpt(fc.getFieldIndex()) < 0) { + return; + } + } + + // Create new collation + final RelCollation newCollation = + RelCollationTraitDef.INSTANCE.canonize( + RexUtil.apply(map, sort.getCollation())); + + // New operators + final HiveSortLimit newSort = sort.copy(sort.getTraitSet().replace(newCollation), + project.getInput(), newCollation, sort.offset, sort.fetch); + final RelNode newProject = project.copy(sort.getTraitSet(), + ImmutableList.of(newSort)); + + call.transformTo(newProject); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java new file mode 100644 index 0000000..618c717 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexLiteral; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +/** + * Planner rule that removes + * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit}. + */ +public class HiveSortRemoveRule extends RelOptRule { + + protected final float reductionProportion; + protected final float reductionTuples; + + //~ Constructors ----------------------------------------------------------- + + public HiveSortRemoveRule(float reductionProportion, long reductionTuples) { + this(operand(HiveSortLimit.class, any()), reductionProportion, reductionTuples); + } + + private HiveSortRemoveRule(RelOptRuleOperand operand, float reductionProportion, + long reductionTuples) { + super(operand); + this.reductionProportion = reductionProportion; + this.reductionTuples = reductionTuples; + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + + // If it is not created by HiveSortJoinReduceRule, we cannot remove it + if (!sortLimit.isRuleCreated()) { + return false; + } + + // Finally, if we do not reduce the size input enough, we bail out + int limit = RexLiteral.intValue(sortLimit.fetch); + Double rowCount = RelMetadataQuery.getRowCount(sortLimit.getInput()); + if (rowCount != null && limit <= reductionProportion * rowCount && + rowCount - limit >= reductionTuples) { + return false; + } + + return true; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + + // We remove the limit operator + call.transformTo(sortLimit.getInput()); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 728c5aa..caf8978 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -30,6 +30,7 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdRowCount; @@ -38,6 +39,7 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -87,6 +89,22 @@ public class HiveRelMdRowCount extends RelMdRowCount { return super.getRowCount(rel); } + @Override + public Double getRowCount(Sort rel) { + final Double rowCount = RelMetadataQuery.getRowCount(rel.getInput()); + if (rowCount != null && rel.fetch != null) { + final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset); + final int limit = RexLiteral.intValue(rel.fetch); + final Double offsetLimit = new Double(offset + limit); + // offsetLimit is smaller than rowCount of the input operator + // thus, we return the offsetLimit + if (offsetLimit < rowCount) { + return offsetLimit; + } + } + return rowCount; + } + static class PKFKRelationInfo { public final int fkSide; public final double ndvScalingFactor; http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 715f24f..a0eb83d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -61,6 +61,16 @@ public class HiveRelMdSelectivity extends RelMdSelectivity { public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { if (j.getJoinType().equals(JoinRelType.INNER)) { return computeInnerJoinSelectivity(j, predicate); + } else if (j.getJoinType().equals(JoinRelType.LEFT) || + j.getJoinType().equals(JoinRelType.RIGHT)) { + double left = RelMetadataQuery.getRowCount(j.getLeft()); + double right = RelMetadataQuery.getRowCount(j.getRight()); + double product = left * right; + double innerJoinSelectivity = computeInnerJoinSelectivity(j, predicate); + if (j.getJoinType().equals(JoinRelType.LEFT)) { + return Math.max(innerJoinSelectivity, left/product); + } + return Math.max(innerJoinSelectivity, right/product); } return 1.0; } http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index de67b54..bfe4633 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -148,8 +148,13 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -953,16 +958,31 @@ public class CalcitePlanner extends SemanticAnalyzer { basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE); } - // 1. Push Down Semi Joins + // 1. Push down limit through outer join + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) { + // This should be a cost based decision, but till we enable the extended cost + // model, we will use the given value for the variable + final float reductionProportion = HiveConf.getFloatVar(conf, + HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE); + final long reductionTuples = HiveConf.getLongVar(conf, + HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES); + basePlan = hepPlan(basePlan, true, mdProvider, HiveSortMergeRule.INSTANCE, + HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE); + basePlan = hepPlan(basePlan, true, mdProvider, HepMatchOrder.BOTTOM_UP, + new HiveSortRemoveRule(reductionProportion, reductionTuples), + HiveProjectSortTransposeRule.INSTANCE); + } + + // 2. Push Down Semi Joins basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 2. Add not null filters + // 3. Add not null filters if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); } - // 3. Constant propagation, common filter extraction, and PPD + // 4. Constant propagation, common filter extraction, and PPD basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, @@ -976,12 +996,12 @@ public class CalcitePlanner extends SemanticAnalyzer { new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - // 4. Transitive inference & Partition Pruning + // 5. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 5. Projection Pruning + // 6. Projection Pruning HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, cluster, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, @@ -989,7 +1009,7 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 6. Rerun PPD through Project as column pruning would have introduced DT + // 7. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/test/queries/clientpositive/limit_join_transpose.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/limit_join_transpose.q b/ql/src/test/queries/clientpositive/limit_join_transpose.q new file mode 100644 index 0000000..bfc2378 --- /dev/null +++ b/ql/src/test/queries/clientpositive/limit_join_transpose.q @@ -0,0 +1,99 @@ +set hive.optimize.limitjointranspose=false; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + + +set hive.optimize.limitjointranspose=true; +set hive.optimize.limitjointranspose.reductionpercentage=0.0001f; +set hive.optimize.limitjointranspose.reductiontuples=10; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + + +set hive.optimize.limitjointranspose.reductionpercentage=0.1f; +set hive.optimize.limitjointranspose.reductiontuples=10; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +set hive.optimize.limitjointranspose.reductionpercentage=1f; +set hive.optimize.limitjointranspose.reductiontuples=0; + +explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1; + +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1; http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q new file mode 100644 index 0000000..1994b40 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -0,0 +1,24 @@ +set hive.optimize.limitjointranspose=true; +set hive.optimize.limitjointranspose.reductionpercentage=0.1f; +set hive.optimize.limitjointranspose.reductiontuples=100; +set hive.explain.user=false; +set hive.auto.convert.join=false; +set hive.optimize.dynamic.partition.hashjoin=false; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1; + + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.exec.reducers.bytes.per.reducer=200000; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1; http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/test/results/clientpositive/join32_lessSize.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out index 5ea4024..8e71710 100644 --- a/ql/src/test/results/clientpositive/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out @@ -1920,22 +1920,22 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-7 + Stage-8 is a root stage Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-6 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:y + $hdt$_1:$hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:y + $hdt$_1:$hdt$_1:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1950,7 +1950,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 1 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2089,12 +2089,12 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_0:$hdt$_1:x] + /src1 [$hdt$_1:$hdt$_2:x] - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:z + $hdt$_0:z Fetch Operator limit: -1 Partition Description: @@ -2144,7 +2144,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_1:z + $hdt$_0:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2159,11 +2159,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -2172,13 +2172,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col4 + Position of Big Table: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2313,7 +2313,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/test/results/clientpositive/join_merging.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out index b42bdc5..9ab9587 100644 --- a/ql/src/test/results/clientpositive/join_merging.q.out +++ b/ql/src/test/results/clientpositive/join_merging.q.out @@ -97,12 +97,12 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey p1.p_size > 10 and p1.p_size > p2.p_size + 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -153,17 +153,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -175,17 +168,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int) Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col3 (type: int) + expressions: _col2 (type: int), _col4 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/39bd58b2/ql/src/test/results/clientpositive/limit_join_transpose.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/limit_join_transpose.q.out b/ql/src/test/results/clientpositive/limit_join_transpose.q.out new file mode 100644 index 0000000..8717154 --- /dev/null +++ b/ql/src/test/results/clientpositive/limit_join_transpose.q.out @@ -0,0 +1,869 @@ +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 +PREHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 238 val_238 +PREHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 238 val_238 +PREHOOK: query: explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0