Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 811F8200C4E for ; Thu, 23 Mar 2017 06:55:25 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 7E5D6160B97; Thu, 23 Mar 2017 05:55:25 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 9BC0B160B94 for ; Thu, 23 Mar 2017 06:55:24 +0100 (CET) Received: (qmail 78087 invoked by uid 500); 23 Mar 2017 05:55:23 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 77851 invoked by uid 99); 23 Mar 2017 05:55:23 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 23 Mar 2017 05:55:23 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id CB90DE967A; Thu, 23 Mar 2017 05:55:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hashutosh@apache.org To: commits@hive.apache.org Date: Thu, 23 Mar 2017 05:55:25 -0000 Message-Id: <5d3f9a531b974875941cfaad2b25c1db@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [4/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan) archived-at: Thu, 23 Mar 2017 05:55:25 -0000 HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/112cbd19 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/112cbd19 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/112cbd19 Branch: refs/heads/master Commit: 112cbd19c96bbe298aa371e82ac867caca189b15 Parents: 9c692a5 Author: Vineet Garg Authored: Wed Mar 22 22:54:25 2017 -0700 Committer: Ashutosh Chauhan Committed: Wed Mar 22 22:54:25 2017 -0700 ---------------------------------------------------------------------- .../calcite/rules/HiveSubQueryRemoveRule.java | 38 +- .../subquery_scalar_corr_multi_rows.q | 2 + .../subquery_scalar_corr_multi_rows.q.out | 5 + .../clientpositive/llap/subquery_scalar.q.out | 1392 ++++++++++++++---- .../results/clientpositive/perf/query1.q.out | 248 ++-- .../results/clientpositive/perf/query30.q.out | 341 +++-- .../results/clientpositive/perf/query6.q.out | 353 +++-- .../results/clientpositive/perf/query81.q.out | 341 +++-- 8 files changed, 1859 insertions(+), 861 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 76e0780..7c96f3d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -164,6 +164,25 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ boolean isCorrScalarAgg) { switch (e.getKind()) { case SCALAR_QUERY: + builder.push(e.rel); + // returns single row/column + builder.aggregate(builder.groupKey(), + builder.count(false, "cnt")); + + SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, + InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); + + // we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer + // ends up getting rid of Project since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + builder.call(countCheck, builder.field("cnt")), + builder.literal(1))); + if( !variablesSet.isEmpty()) + { + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + } + else + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); if(isCorrScalarAgg) { // Transformation : // Outer Query Left Join (inner query) on correlated predicate and preserve rows only from left side. @@ -193,26 +212,7 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ //Transformation is to left join for correlated predicates and inner join otherwise, // but do a count on inner side before that to make sure it generates atmost 1 row. - builder.push(e.rel); - // returns single row/column - builder.aggregate(builder.groupKey(), - builder.count(false, "cnt")); - - SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, - InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); - // we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer - // ends up getting rid of Project since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - builder.call(countCheck, builder.field("cnt")), - builder.literal(1))); - - if( !variablesSet.isEmpty()) - { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } - else - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); builder.push(e.rel); builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); offset++; http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/queries/clientnegative/subquery_scalar_corr_multi_rows.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_scalar_corr_multi_rows.q b/ql/src/test/queries/clientnegative/subquery_scalar_corr_multi_rows.q new file mode 100644 index 0000000..e9ea703 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_scalar_corr_multi_rows.q @@ -0,0 +1,2 @@ +-- inner query produces more than one row +select * from part where p_size > (select count(*) from part p where p.p_mfgr = part.p_mfgr group by p_type); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/results/clientnegative/subquery_scalar_corr_multi_rows.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_scalar_corr_multi_rows.q.out b/ql/src/test/results/clientnegative/subquery_scalar_corr_multi_rows.q.out new file mode 100644 index 0000000..3235048 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_scalar_corr_multi_rows.q.out @@ -0,0 +1,5 @@ +PREHOOK: query: select * from part where p_size > (select count(*) from part p where p.p_mfgr = part.p_mfgr group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask