Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8002918041 for ; Mon, 17 Aug 2015 22:00:43 +0000 (UTC) Received: (qmail 83502 invoked by uid 500); 17 Aug 2015 22:00:32 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 83357 invoked by uid 500); 17 Aug 2015 22:00:32 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 82837 invoked by uid 99); 17 Aug 2015 22:00:32 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 17 Aug 2015 22:00:32 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 14E52E0441; Mon, 17 Aug 2015 22:00:32 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Date: Mon, 17 Aug 2015 22:00:46 -0000 Message-Id: <2a66e6910a8d4c68b95d8cdd7bfa7436@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [16/50] [abbrv] hive git commit: HIVE-11416: CBO: Calcite Operator To Hive Operator (Calcite Return Path): Groupby Optimizer assumes the schema can match after removing RS and GBY (reviewed by Jesus Camacho Rodriguez) HIVE-11416: CBO: Calcite Operator To Hive Operator (Calcite Return Path): Groupby Optimizer assumes the schema can match after removing RS and GBY (reviewed by Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/763cb02b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/763cb02b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/763cb02b Branch: refs/heads/hbase-metastore Commit: 763cb02b5eafb0ecd3fd0eb512636a1b092df671 Parents: 57ba795 Author: Pengcheng Xiong Authored: Tue Aug 11 11:26:48 2015 -0700 Committer: Pengcheng Xiong Committed: Tue Aug 11 11:26:48 2015 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/Operator.java | 25 --------- .../hive/ql/optimizer/GroupByOptimizer.java | 58 +++++++++++++++++++- 2 files changed, 57 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/763cb02b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 0f02737..acbe504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -769,31 +769,6 @@ public abstract class Operator implements Serializable,C } } - // Remove the operators till a certain depth. - // Return true if the remove was successful, false otherwise - public boolean removeChildren(int depth) { - Operator currOp = this; - for (int i = 0; i < depth; i++) { - // If there are more than 1 children at any level, don't do anything - if ((currOp.getChildOperators() == null) || (currOp.getChildOperators().isEmpty()) || - (currOp.getChildOperators().size() > 1)) { - return false; - } - currOp = currOp.getChildOperators().get(0); - } - - setChildOperators(currOp.getChildOperators()); - - List> parentOps = - new ArrayList>(); - parentOps.add(this); - - for (Operator op : currOp.getChildOperators()) { - op.setParentOperators(parentOps); - } - return true; - } - /** * Replace one parent with another at the same position. Chilren of the new * parent are not updated http://git-wip-us.apache.org/repos/asf/hive/blob/763cb02b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java index af54286..ce3f59a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -31,9 +32,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -520,12 +524,64 @@ public class GroupByOptimizer implements Transform { return; } - if (groupByOp.removeChildren(depth)) { + if (removeChildren(groupByOp, depth)) { // Use bucketized hive input format - that makes sure that one mapper reads the entire file groupByOp.setUseBucketizedHiveInputFormat(true); groupByOp.getConf().setMode(GroupByDesc.Mode.FINAL); } } + + // Remove the operators till a certain depth. + // Return true if the remove was successful, false otherwise + public boolean removeChildren(Operator currOp, int depth) { + Operator inputOp = currOp; + for (int i = 0; i < depth; i++) { + // If there are more than 1 children at any level, don't do anything + if ((currOp.getChildOperators() == null) || (currOp.getChildOperators().isEmpty()) + || (currOp.getChildOperators().size() > 1)) { + return false; + } + currOp = currOp.getChildOperators().get(0); + } + + // add selectOp to match the schema + // after that, inputOp is the parent of selOp. + for (Operator op : inputOp.getChildOperators()) { + op.getParentOperators().clear(); + } + inputOp.getChildOperators().clear(); + Operator selOp = genOutputSelectForGroupBy(inputOp, currOp); + + // update the childOp of selectOp + selOp.setChildOperators(currOp.getChildOperators()); + + // update the parentOp + for (Operator op : currOp.getChildOperators()) { + op.replaceParent(currOp, selOp); + } + return true; + } + + private Operator genOutputSelectForGroupBy( + Operator parentOp, Operator currOp) { + assert (parentOp.getSchema().getSignature().size() == currOp.getSchema().getSignature().size()); + Iterator pIter = parentOp.getSchema().getSignature().iterator(); + Iterator cIter = currOp.getSchema().getSignature().iterator(); + List columns = new ArrayList(); + List colName = new ArrayList(); + Map columnExprMap = new HashMap(); + while (pIter.hasNext()) { + ColumnInfo pInfo = pIter.next(); + ColumnInfo cInfo = cIter.next(); + ExprNodeDesc column = new ExprNodeColumnDesc(pInfo.getType(), pInfo.getInternalName(), + pInfo.getTabAlias(), pInfo.getIsVirtualCol(), pInfo.isSkewedCol()); + columns.add(column); + colName.add(cInfo.getInternalName()); + columnExprMap.put(cInfo.getInternalName(), column); + } + return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(currOp + .getSchema().getSignature()), columnExprMap, parentOp); + } } /**