Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 800FB18FAE for ; Fri, 18 Mar 2016 12:16:34 +0000 (UTC) Received: (qmail 66768 invoked by uid 500); 18 Mar 2016 12:16:34 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 66728 invoked by uid 500); 18 Mar 2016 12:16:34 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 66717 invoked by uid 99); 18 Mar 2016 12:16:34 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 18 Mar 2016 12:16:34 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 38833DFA0A; Fri, 18 Mar 2016 12:16:34 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jcamacho@apache.org To: commits@hive.apache.org Message-Id: <3bb2762ddb084179b302e445e494c370@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-13242: DISTINCT keyword is dropped by the parser for windowing (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Date: Fri, 18 Mar 2016 12:16:34 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master a65917a3b -> 586c30441 HIVE-13242: DISTINCT keyword is dropped by the parser for windowing (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/586c3044 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/586c3044 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/586c3044 Branch: refs/heads/master Commit: 586c3044176b722b40fbcfa55d82055ff40fd592 Parents: a65917a Author: Jesus Camacho Rodriguez Authored: Thu Mar 17 10:32:08 2016 +0100 Committer: Jesus Camacho Rodriguez Committed: Fri Mar 18 13:15:23 2016 +0100 ---------------------------------------------------------------------- .../ql/optimizer/calcite/HiveRelFactories.java | 5 - .../calcite/reloperators/HiveAggregate.java | 9 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 31 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 9 +- .../distinct_windowing_failure1.q | 20 + .../distinct_windowing_failure2.q | 22 + .../queries/clientpositive/distinct_windowing.q | 39 ++ .../distinct_windowing_failure1.q.out | 47 ++ .../distinct_windowing_failure2.q.out | 47 ++ .../clientpositive/distinct_windowing.q.out | 451 +++++++++++++++++++ 10 files changed, 655 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index 83205bc..971b446 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -24,7 +24,6 @@ import java.util.Set; import org.apache.calcite.plan.Contexts; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.AggregateCall; @@ -193,12 +192,8 @@ public class HiveRelFactories { public RelNode createAggregate(RelNode child, boolean indicator, ImmutableBitSet groupSet, ImmutableList groupSets, List aggCalls) { - try { return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, indicator, groupSet, groupSets, aggCalls); - } catch (InvalidRelException e) { - throw new RuntimeException(e); - } } } http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java index 2548fa0..9cb62c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java @@ -25,7 +25,6 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; @@ -44,7 +43,7 @@ public class HiveAggregate extends Aggregate implements HiveRelNode { public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, boolean indicator, ImmutableBitSet groupSet, List groupSets, - List aggCalls) throws InvalidRelException { + List aggCalls) { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, indicator, groupSet, groupSets, aggCalls); } @@ -53,14 +52,8 @@ public class HiveAggregate extends Aggregate implements HiveRelNode { public Aggregate copy(RelTraitSet traitSet, RelNode input, boolean indicator, ImmutableBitSet groupSet, List groupSets, List aggCalls) { - try { return new HiveAggregate(getCluster(), traitSet, input, indicator, groupSet, groupSets, aggCalls); - } catch (InvalidRelException e) { - // Semantic error not possible. Must be a bug. Convert to - // internal error. - throw new AssertionError(e); - } } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f8860b7..fd2246b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -52,7 +52,6 @@ import org.apache.calcite.plan.hep.HepMatchOrder; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; -import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollations; @@ -2047,14 +2046,9 @@ public class CalcitePlanner extends SemanticAnalyzer { } RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); - HiveRelNode aggregateRel = null; - try { - aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, (transformedGroupSets!=null ? true:false), groupSet, transformedGroupSets, aggregateCalls); - } catch (InvalidRelException e) { - throw new SemanticException(e); - } return aggregateRel; } @@ -2231,7 +2225,7 @@ public class CalcitePlanner extends SemanticAnalyzer { } } - List grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); + List grpByAstExprs = getGroupByForClause(qbp, detsClauseName); HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true @@ -3013,9 +3007,26 @@ public class CalcitePlanner extends SemanticAnalyzer { } // 8. Build Calcite Rel - RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); + RelNode outputRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); - return selRel; + // 9. Handle select distinct as GBY if there exist windowing functions + if (selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI) { + ImmutableBitSet groupSet = ImmutableBitSet.range(outputRel.getRowType().getFieldList().size()); + outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + outputRel, false, groupSet, null, new ArrayList()); + RowResolver groupByOutputRowResolver = new RowResolver(); + for (int i = 0; i < out_rwsch.getColumnInfos().size(); i++) { + ColumnInfo colInfo = out_rwsch.getColumnInfos().get(i); + ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(), + colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo); + } + relToHiveColNameCalcitePosMap.put(outputRel, + buildHiveToCalciteColumnMap(groupByOutputRowResolver, outputRel)); + this.relToHiveRR.put(outputRel, groupByOutputRowResolver); + } + + return outputRel; } private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2dcb6d6..0845bc9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -3756,7 +3756,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY * a,b,c. */ - static List getGroupByForClause(QBParseInfo parseInfo, String dest) { + List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException { if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { ASTNode selectExprs = parseInfo.getSelForClause(dest); List result = new ArrayList(selectExprs == null ? 0 @@ -3774,6 +3774,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * If this is handled by Windowing then ignore it. */ if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree())) { + if (!isCBOExecuted()) { + throw new SemanticException("SELECT DISTINCT not allowed in the presence of windowing" + + " functions when CBO is off"); + } continue; } result.add(grpbyExpr); @@ -4161,6 +4165,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb, output, outerLV); } + if (LOG.isDebugEnabled()) { LOG.debug("Created Select Plan row schema: " + out_rwsch.toString()); } @@ -5852,7 +5857,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return groupByOperatorInfo2; } - private boolean optimizeMapAggrGroupBy(String dest, QB qb) { + private boolean optimizeMapAggrGroupBy(String dest, QB qb) throws SemanticException { List grpByExprs = getGroupByForClause(qb.getParseInfo(), dest); if ((grpByExprs != null) && !grpByExprs.isEmpty()) { return false; http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q b/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q new file mode 100644 index 0000000..39fe474 --- /dev/null +++ b/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q @@ -0,0 +1,20 @@ +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +select count(distinct last_value(i) over ( partition by si order by i )) from over10k; http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q b/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q new file mode 100644 index 0000000..f07dc18 --- /dev/null +++ b/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q @@ -0,0 +1,22 @@ +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +select distinct last_value(i) over ( partition by si order by i ), + distinct first_value(t) over ( partition by si order by i ) +from over10k ; http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/queries/clientpositive/distinct_windowing.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/distinct_windowing.q b/ql/src/test/queries/clientpositive/distinct_windowing.q new file mode 100644 index 0000000..f6a00f0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/distinct_windowing.q @@ -0,0 +1,39 @@ +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain +select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10; + +select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10; + +explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k limit 10; + +select distinct last_value(i) over ( partition by si order by i ) +from over10k limit 10; + +explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k limit 50; + +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k limit 50; http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out b/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out new file mode 100644 index 0000000..18cf0c9 --- /dev/null +++ b/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out @@ -0,0 +1,47 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +FAILED: SemanticException [Error 10002]: Line 3:68 Invalid column reference 'i': (possible column names are: t, si, i, b, f, d, bo, s, ts, dec, bin) http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out b/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out new file mode 100644 index 0000000..e370819 --- /dev/null +++ b/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out @@ -0,0 +1,47 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +FAILED: ParseException line 4:7 cannot recognize input near 'distinct' 'first_value' '(' in selection target http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/results/clientpositive/distinct_windowing.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/distinct_windowing.q.out b/ql/src/test/results/clientpositive/distinct_windowing.q.out new file mode 100644 index 0000000..e6cde90 --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_windowing.q.out @@ -0,0 +1,451 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain +select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: t (type: tinyint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: first_value_window_0 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: PRECEDING(MAX)~ + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: first_value_window_0 (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +-2 +-1 +0 +1 +2 +3 +4 +6 +7 +8 +PREHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: last_value_window_0 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: PRECEDING(MAX)~ + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: last_value_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ) +from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ) +from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 +PREHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k limit 50 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k limit 50 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: t (type: tinyint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: last_value_window_0 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: PRECEDING(MAX)~ + window function definition + alias: first_value_window_1 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: PRECEDING(MAX)~ + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint) + Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 50 + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 50 + Processor Tree: + ListSink + +PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 -2 +65536 2 +65536 9 +65536 12 +65536 13 +65536 18 +65536 22 +65536 23 +65536 27 +65536 37 +65536 39 +65536 42 +65536 48 +65536 55 +65536 56 +65536 58 +65536 61 +65536 69 +65536 71 +65536 73 +65536 75 +65536 78 +65536 80 +65536 83 +65536 84 +65536 88 +65536 94 +65536 104 +65536 107 +65536 108 +65536 111 +65536 114 +65536 118 +65536 119 +65536 121 +65537 4 +65537 8 +65537 9 +65537 11 +65537 18 +65537 22 +65537 25 +65537 36 +65537 51 +65537 53 +65537 54 +65537 55 +65537 56 +65537 57 +65537 59