hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-13242: DISTINCT keyword is dropped by the parser for windowing (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Fri, 18 Mar 2016 12:16:34 GMT
Repository: hive
Updated Branches:
  refs/heads/master a65917a3b -> 586c30441


HIVE-13242: DISTINCT keyword is dropped by the parser for windowing (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/586c3044
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/586c3044
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/586c3044

Branch: refs/heads/master
Commit: 586c3044176b722b40fbcfa55d82055ff40fd592
Parents: a65917a
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Thu Mar 17 10:32:08 2016 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Fri Mar 18 13:15:23 2016 +0100

----------------------------------------------------------------------
 .../ql/optimizer/calcite/HiveRelFactories.java  |   5 -
 .../calcite/reloperators/HiveAggregate.java     |   9 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  31 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   9 +-
 .../distinct_windowing_failure1.q               |  20 +
 .../distinct_windowing_failure2.q               |  22 +
 .../queries/clientpositive/distinct_windowing.q |  39 ++
 .../distinct_windowing_failure1.q.out           |  47 ++
 .../distinct_windowing_failure2.q.out           |  47 ++
 .../clientpositive/distinct_windowing.q.out     | 451 +++++++++++++++++++
 10 files changed, 655 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
index 83205bc..971b446 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
@@ -24,7 +24,6 @@ import java.util.Set;
 import org.apache.calcite.plan.Contexts;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelTraitSet;
-import org.apache.calcite.rel.InvalidRelException;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.AggregateCall;
@@ -193,12 +192,8 @@ public class HiveRelFactories {
     public RelNode createAggregate(RelNode child, boolean indicator,
             ImmutableBitSet groupSet, ImmutableList<ImmutableBitSet> groupSets,
             List<AggregateCall> aggCalls) {
-      try {
         return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, indicator,
                 groupSet, groupSets, aggCalls);
-      } catch (InvalidRelException e) {
-          throw new RuntimeException(e);
-      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
index 2548fa0..9cb62c8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
@@ -25,7 +25,6 @@ import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptCost;
 import org.apache.calcite.plan.RelOptPlanner;
 import org.apache.calcite.plan.RelTraitSet;
-import org.apache.calcite.rel.InvalidRelException;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.AggregateCall;
@@ -44,7 +43,7 @@ public class HiveAggregate extends Aggregate implements HiveRelNode {
 
   public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
       boolean indicator, ImmutableBitSet groupSet, List<ImmutableBitSet> groupSets,
-      List<AggregateCall> aggCalls) throws InvalidRelException {
+      List<AggregateCall> aggCalls) {
     super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, indicator, groupSet,
             groupSets, aggCalls);
   }
@@ -53,14 +52,8 @@ public class HiveAggregate extends Aggregate implements HiveRelNode {
   public Aggregate copy(RelTraitSet traitSet, RelNode input,
           boolean indicator, ImmutableBitSet groupSet,
           List<ImmutableBitSet> groupSets, List<AggregateCall> aggCalls) {
-    try {
       return new HiveAggregate(getCluster(), traitSet, input, indicator, groupSet,
               groupSets, aggCalls);
-    } catch (InvalidRelException e) {
-      // Semantic error not possible. Must be a bug. Convert to
-      // internal error.
-      throw new AssertionError(e);
-    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index f8860b7..fd2246b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -52,7 +52,6 @@ import org.apache.calcite.plan.hep.HepMatchOrder;
 import org.apache.calcite.plan.hep.HepPlanner;
 import org.apache.calcite.plan.hep.HepProgram;
 import org.apache.calcite.plan.hep.HepProgramBuilder;
-import org.apache.calcite.rel.InvalidRelException;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollationImpl;
 import org.apache.calcite.rel.RelCollations;
@@ -2047,14 +2046,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
       RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null);
 
-      HiveRelNode aggregateRel = null;
-      try {
-        aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
+      HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
             gbInputRel, (transformedGroupSets!=null ? true:false), groupSet,
             transformedGroupSets, aggregateCalls);
-      } catch (InvalidRelException e) {
-        throw new SemanticException(e);
-      }
 
       return aggregateRel;
     }
@@ -2231,7 +2225,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
         }
       }
 
-      List<ASTNode> grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName);
+      List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
       HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
       boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty())
? true : false;
       boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty())
? true
@@ -3013,9 +3007,26 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
 
       // 8. Build Calcite Rel
-      RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel);
+      RelNode outputRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel);
 
-      return selRel;
+      // 9. Handle select distinct as GBY if there exist windowing functions
+      if (selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI)
{
+        ImmutableBitSet groupSet = ImmutableBitSet.range(outputRel.getRowType().getFieldList().size());
+        outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
+              outputRel, false, groupSet, null, new ArrayList<AggregateCall>());
+        RowResolver groupByOutputRowResolver = new RowResolver();
+        for (int i = 0; i < out_rwsch.getColumnInfos().size(); i++) {
+          ColumnInfo colInfo = out_rwsch.getColumnInfos().get(i);
+          ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(),
+              colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+          groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
+        }
+        relToHiveColNameCalcitePosMap.put(outputRel,
+            buildHiveToCalciteColumnMap(groupByOutputRowResolver, outputRel));
+        this.relToHiveRR.put(outputRel, groupByOutputRowResolver);
+      }
+
+      return outputRel;
     }
 
     private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException {

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 2dcb6d6..0845bc9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3756,7 +3756,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
    * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
    * a,b,c.
    */
-  static List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) {
+  List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException
{
     if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI)
{
       ASTNode selectExprs = parseInfo.getSelForClause(dest);
       List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
@@ -3774,6 +3774,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
            * If this is handled by Windowing then ignore it.
            */
           if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree()))
{
+            if (!isCBOExecuted()) {
+              throw new SemanticException("SELECT DISTINCT not allowed in the presence of
windowing"
+                      + " functions when CBO is off");
+            }
             continue;
           }
           result.add(grpbyExpr);
@@ -4161,6 +4165,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
           output, outerLV);
     }
+
     if (LOG.isDebugEnabled()) {
       LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
     }
@@ -5852,7 +5857,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return groupByOperatorInfo2;
   }
 
-  private boolean optimizeMapAggrGroupBy(String dest, QB qb) {
+  private boolean optimizeMapAggrGroupBy(String dest, QB qb) throws SemanticException {
     List<ASTNode> grpByExprs = getGroupByForClause(qb.getParseInfo(), dest);
     if ((grpByExprs != null) && !grpByExprs.isEmpty()) {
       return false;

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q b/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q
new file mode 100644
index 0000000..39fe474
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q
@@ -0,0 +1,20 @@
+drop table over10k;
+
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+select count(distinct last_value(i) over ( partition by si order by i )) from over10k;

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q b/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q
new file mode 100644
index 0000000..f07dc18
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q
@@ -0,0 +1,22 @@
+drop table over10k;
+
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+select distinct last_value(i) over ( partition by si order by i ),
+       distinct first_value(t)  over ( partition by si order by i )
+from over10k ;

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/queries/clientpositive/distinct_windowing.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/distinct_windowing.q b/ql/src/test/queries/clientpositive/distinct_windowing.q
new file mode 100644
index 0000000..f6a00f0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_windowing.q
@@ -0,0 +1,39 @@
+drop table over10k;
+
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50;
+
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50;

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out b/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out
new file mode 100644
index 0000000..18cf0c9
--- /dev/null
+++ b/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+FAILED: SemanticException [Error 10002]: Line 3:68 Invalid column reference 'i': (possible
column names are: t, si, i, b, f, d, bo, s, ts, dec, bin)

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out b/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out
new file mode 100644
index 0000000..e370819
--- /dev/null
+++ b/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+FAILED: ParseException line 4:7 cannot recognize input near 'distinct' 'first_value' '('
in selection target

http://git-wip-us.apache.org/repos/asf/hive/blob/586c3044/ql/src/test/results/clientpositive/distinct_windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/distinct_windowing.q.out b/ql/src/test/results/clientpositive/distinct_windowing.q.out
new file mode 100644
index 0000000..e6cde90
--- /dev/null
+++ b/ql/src/test/results/clientpositive/distinct_windowing.q.out
@@ -0,0 +1,451 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats:
NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              value expressions: t (type: tinyint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint),
KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats:
NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: tinyint, _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: first_value_window_0
+                        arguments: _col0
+                        name: first_value
+                        window function: GenericUDAFFirstValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats:
NONE
+            Select Operator
+              expressions: first_value_window_0 (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              Group By Operator
+                keys: _col0 (type: tinyint)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: tinyint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats:
NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats:
NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k
limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from
over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+-2
+-1
+0
+1
+2
+3
+4
+6
+7
+8
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats:
NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: last_value_window_0
+                        arguments: _col2
+                        name: last_value
+                        window function: GenericUDAFLastValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+            Select Operator
+              expressions: last_value_window_0 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              Group By Operator
+                keys: _col0 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats:
NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats:
NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536
+65537
+65538
+65539
+65540
+65541
+65542
+65543
+65544
+65545
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats:
NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              value expressions: t (type: tinyint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint),
KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats:
NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: tinyint, _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: last_value_window_0
+                        arguments: _col2
+                        name: last_value
+                        window function: GenericUDAFLastValueEvaluator
+                        window frame: PRECEDING(MAX)~
+                      window function definition
+                        alias: first_value_window_1
+                        arguments: _col0
+                        name: first_value
+                        window function: GenericUDAFFirstValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats:
NONE
+            Select Operator
+              expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              Group By Operator
+                keys: _col0 (type: int), _col1 (type: tinyint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: tinyint)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats:
NONE
+          Limit
+            Number of rows: 50
+            Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats:
NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 50
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536	-2
+65536	2
+65536	9
+65536	12
+65536	13
+65536	18
+65536	22
+65536	23
+65536	27
+65536	37
+65536	39
+65536	42
+65536	48
+65536	55
+65536	56
+65536	58
+65536	61
+65536	69
+65536	71
+65536	73
+65536	75
+65536	78
+65536	80
+65536	83
+65536	84
+65536	88
+65536	94
+65536	104
+65536	107
+65536	108
+65536	111
+65536	114
+65536	118
+65536	119
+65536	121
+65537	4
+65537	8
+65537	9
+65537	11
+65537	18
+65537	22
+65537	25
+65537	36
+65537	51
+65537	53
+65537	54
+65537	55
+65537	56
+65537	57
+65537	59


Mime
View raw message