Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 52FF8200C14 for ; Tue, 7 Feb 2017 21:58:45 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 51C0F160B70; Tue, 7 Feb 2017 20:58:45 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 70033160B6F for ; Tue, 7 Feb 2017 21:58:44 +0100 (CET) Received: (qmail 25933 invoked by uid 500); 7 Feb 2017 20:58:34 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 22934 invoked by uid 99); 7 Feb 2017 20:58:31 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 07 Feb 2017 20:58:31 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 86FC0E0394; Tue, 7 Feb 2017 20:58:31 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Date: Tue, 07 Feb 2017 20:59:32 -0000 Message-Id: <18afa3797e5c4653b4ea9b004a999996@git.apache.org> In-Reply-To: <07dd24f853f44e5796f540969e72a3f9@git.apache.org> References: <07dd24f853f44e5796f540969e72a3f9@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [64/70] [abbrv] hive git commit: HIVE-15808: Remove semijoin reduction branch if it is on bigtable along with hash join (Deepak Jaiswal, reviewed by Jason Dere) archived-at: Tue, 07 Feb 2017 20:58:45 -0000 HIVE-15808: Remove semijoin reduction branch if it is on bigtable along with hash join (Deepak Jaiswal, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f6cdbc87 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f6cdbc87 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f6cdbc87 Branch: refs/heads/hive-14535 Commit: f6cdbc87955aa5cdb83f174a73db9a7d8071f78b Parents: 3ed7dc2 Author: Gunther Hagleitner Authored: Tue Feb 7 11:11:09 2017 -0800 Committer: Gunther Hagleitner Committed: Tue Feb 7 11:11:09 2017 -0800 ---------------------------------------------------------------------- .../hive/ql/optimizer/ConvertJoinMapJoin.java | 64 +++++++++++--------- .../hadoop/hive/ql/parse/GenTezUtils.java | 8 +-- 2 files changed, 39 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f6cdbc87/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 0f9e86b..e3b293a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -775,51 +775,57 @@ public class ConvertJoinMapJoin implements NodeProcessor { return mapJoinOp; } - // Remove any semijoin branch associated with mapjoin's parent's operator - // pipeline which can cause a cycle after mapjoin optimization. + // Remove any semijoin branch associated with hashjoin's parent's operator + // pipeline which can cause a cycle after hashjoin optimization. private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, Operator parentSelectOpOfBigTable, ParseContext parseContext) throws SemanticException { - boolean semiJoinCycle = false; - ReduceSinkOperator rs = null; - TableScanOperator ts = null; + Map semiJoinMap = + new HashMap(); for (Operator op : parentSelectOpOfBigTable.getChildOperators()) { if (!(op instanceof SelectOperator)) { continue; } - while (op.getChildOperators().size() > 0 ) { + while (op.getChildOperators().size() > 0) { op = op.getChildOperators().get(0); - if (!(op instanceof ReduceSinkOperator)) { - continue; - } - rs = (ReduceSinkOperator) op; - ts = parseContext.getRsOpToTsOpMap().get(rs); - if (ts == null) { + } + + // If not ReduceSink Op, skip + if (!(op instanceof ReduceSinkOperator)) { + continue; + } + + ReduceSinkOperator rs = (ReduceSinkOperator) op; + TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs); + if (ts == null) { + // skip, no semijoin branch + continue; + } + + // Found a semijoin branch. + for (Operator parent : mapjoinOp.getParentOperators()) { + if (!(parent instanceof ReduceSinkOperator)) { continue; } - for (Operator parent : mapjoinOp.getParentOperators()) { - if (!(parent instanceof ReduceSinkOperator)) { - continue; - } - Set tsOps = OperatorUtils.findOperatorsUpstream(parent, - TableScanOperator.class); - for (TableScanOperator parentTS : tsOps) { - // If the parent is same as the ts, then we have a cycle. - if (ts == parentTS) { - semiJoinCycle = true; - break; - } + Set tsOps = OperatorUtils.findOperatorsUpstream(parent, + TableScanOperator.class); + for (TableScanOperator parentTS : tsOps) { + // If the parent is same as the ts, then we have a cycle. + if (ts == parentTS) { + semiJoinMap.put(rs, ts); + break; } } } } - - // By design there can be atmost 1 such cycle. - if (semiJoinCycle) { - GenTezUtils.removeBranch(rs); - GenTezUtils.removeSemiJoinOperator(parseContext, rs, ts); + if (semiJoinMap.size() > 0) { + for (ReduceSinkOperator rs : semiJoinMap.keySet()) { + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(parseContext, rs, + semiJoinMap.get(rs)); + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/f6cdbc87/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index aee74ad..7f5fdff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -265,7 +265,6 @@ public class GenTezUtils { for (ReduceSinkOperator rs : rsOpToTsOpMap.keySet()) { if (rsOpToTsOpMap.get(rs) == orig) { rsOpToTsOpMap.put(rs, (TableScanOperator) newRoot); - break; } } } @@ -569,7 +568,8 @@ public class GenTezUtils { TypeInfoFactory.booleanTypeInfo, Boolean.TRUE); DynamicValuePredicateContext filterDynamicValuePredicatesCollection = new DynamicValuePredicateContext(); - collectDynamicValuePredicates(((FilterOperator)(ts.getChildOperators().get(0))).getConf().getPredicate(), + FilterDesc filterDesc = ((FilterOperator)(ts.getChildOperators().get(0))).getConf(); + collectDynamicValuePredicates(filterDesc.getPredicate(), filterDynamicValuePredicatesCollection); for (ExprNodeDesc nodeToRemove : filterDynamicValuePredicatesCollection .childParentMapping.keySet()) { @@ -594,8 +594,8 @@ public class GenTezUtils { ExprNodeDesc nodeParent = filterDynamicValuePredicatesCollection .childParentMapping.get(nodeToRemove); if (nodeParent == null) { - // This was the only predicate, set filter expression to null - ts.getConf().setFilterExpr(null); + // This was the only predicate, set filter expression to const + filterDesc.setPredicate(constNode); } else { int i = nodeParent.getChildren().indexOf(nodeToRemove); nodeParent.getChildren().remove(i);