Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2E65218419 for ; Wed, 13 Jan 2016 09:35:45 +0000 (UTC) Received: (qmail 11193 invoked by uid 500); 13 Jan 2016 09:35:45 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 11151 invoked by uid 500); 13 Jan 2016 09:35:45 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 11140 invoked by uid 99); 13 Jan 2016 09:35:45 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 13 Jan 2016 09:35:44 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id CB961E03CE; Wed, 13 Jan 2016 09:35:44 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: harisankar@apache.org To: commits@hive.apache.org Message-Id: <591393d1c8c24ccf890a0b0238f06385@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-12640 : Allow StatsOptimizer to optimize the query for Constant GroupBy keys (Hari Subramaniyan, reviewed by Ashutosh Chauhan) Date: Wed, 13 Jan 2016 09:35:44 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master 9f1ff4f17 -> de30fe4e2 HIVE-12640 : Allow StatsOptimizer to optimize the query for Constant GroupBy keys (Hari Subramaniyan, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de30fe4e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de30fe4e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de30fe4e Branch: refs/heads/master Commit: de30fe4e2dd9d3aa62d522fbe340cce549f0d3f5 Parents: 9f1ff4f Author: Hari Subramaniyan Authored: Wed Jan 13 01:35:34 2016 -0800 Committer: Hari Subramaniyan Committed: Wed Jan 13 01:35:34 2016 -0800 ---------------------------------------------------------------------- .../hive/ql/optimizer/StatsOptimizer.java | 22 ++++++++-- .../clientpositive/metadata_only_queries.q | 6 +++ .../clientpositive/metadata_only_queries.q.out | 46 ++++++++++++++++++++ .../spark/metadata_only_queries.q.out | 46 ++++++++++++++++++++ .../tez/metadata_only_queries.q.out | 46 ++++++++++++++++++++ 5 files changed, 162 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 03c1c3f..03dcf9f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin; @@ -199,6 +200,21 @@ public class StatsOptimizer extends Transform { } } + private boolean hasNullOrConstantGbyKey(GroupByOperator gbyOp) { + GroupByDesc gbyDesc = gbyOp.getConf(); + // If the Group by operator has null key + if (gbyDesc.getOutputColumnNames().size() == + gbyDesc.getAggregators().size()) { + return true; + } + for (ExprNodeDesc en :gbyDesc.getKeys()) { + if (!(en instanceof ExprNodeConstantDesc)) { + return false; + } + } + return true; + } + @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { @@ -227,8 +243,7 @@ public class StatsOptimizer extends Transform { // Since we have done an exact match on TS-SEL-GBY-RS-GBY-(SEL)-FS // we need not to do any instanceof checks for following. GroupByOperator pgbyOp = (GroupByOperator)stack.get(2); - if (pgbyOp.getConf().getOutputColumnNames().size() != - pgbyOp.getConf().getAggregators().size()) { + if (!hasNullOrConstantGbyKey(pgbyOp)) { return null; } ReduceSinkOperator rsOp = (ReduceSinkOperator)stack.get(3); @@ -238,8 +253,7 @@ public class StatsOptimizer extends Transform { } GroupByOperator cgbyOp = (GroupByOperator)stack.get(4); - if (cgbyOp.getConf().getOutputColumnNames().size() != - cgbyOp.getConf().getAggregators().size()) { + if (!hasNullOrConstantGbyKey(cgbyOp)) { return null; } Operator last = (Operator) stack.get(5); http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/queries/clientpositive/metadata_only_queries.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q index bce121d..cc72bb3 100644 --- a/ql/src/test/queries/clientpositive/metadata_only_queries.q +++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -93,6 +93,12 @@ select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as thre explain select count(ts) from stats_tbl_part; +explain select count('1') from stats_tbl group by '1'; +select count('1') from stats_tbl group by '1'; + +explain select count('1') from stats_tbl_part group by '1'; +select count('1') from stats_tbl_part group by '1'; + drop table stats_tbl; drop table stats_tbl_part; http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/metadata_only_queries.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 65a4dfa..2f782a9 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -620,6 +620,52 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select count('1') from stats_tbl group by '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count('1') from stats_tbl group by '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count('1') from stats_tbl group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +POSTHOOK: query: select count('1') from stats_tbl group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +9999 +PREHOOK: query: explain select count('1') from stats_tbl_part group by '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count('1') from stats_tbl_part group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +POSTHOOK: query: select count('1') from stats_tbl_part group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +9489 PREHOOK: query: drop table stats_tbl PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_tbl http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 0d85f4e..359a9dd 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -650,6 +650,52 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select count('1') from stats_tbl group by '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count('1') from stats_tbl group by '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count('1') from stats_tbl group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +POSTHOOK: query: select count('1') from stats_tbl group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +9999 +PREHOOK: query: explain select count('1') from stats_tbl_part group by '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count('1') from stats_tbl_part group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +POSTHOOK: query: select count('1') from stats_tbl_part group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +9489 PREHOOK: query: drop table stats_tbl PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_tbl http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out index ab86ab0..14fbf0e 100644 --- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out @@ -650,6 +650,52 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select count('1') from stats_tbl group by '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count('1') from stats_tbl group by '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count('1') from stats_tbl group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +POSTHOOK: query: select count('1') from stats_tbl group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +9999 +PREHOOK: query: explain select count('1') from stats_tbl_part group by '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count('1') from stats_tbl_part group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +POSTHOOK: query: select count('1') from stats_tbl_part group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +9489 PREHOOK: query: drop table stats_tbl PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_tbl