hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject [55/58] [abbrv] hive git commit: HIVE-13475: Allow aggregate functions in over clause (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Tue, 19 Apr 2016 22:25:47 GMT
HIVE-13475: Allow aggregate functions in over clause (Jesus Camacho Rodriguez, reviewed by
Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a776f59
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a776f59
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a776f59

Branch: refs/heads/llap
Commit: 6a776f5998b1fc41c602b135c9e1ef04171f4b74
Parents: b30fe72
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Fri Apr 15 12:59:39 2016 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Fri Apr 15 13:24:50 2016 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   5 +
 .../queries/clientpositive/windowing_gby2.q     |  41 ++
 .../results/clientpositive/windowing_gby2.q.out | 652 +++++++++++++++++++
 3 files changed, 698 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d3e7040..329c617 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -566,7 +566,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       assert (expressionTree.getChildCount() != 0);
       if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
           == HiveParser.TOK_WINDOWSPEC) {
+        // If it is a windowing spec, we include it in the list
+        // Further, we will examine its children AST nodes to check whether
+        // there are aggregation functions within
         wdwFns.add(expressionTree);
+        doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1),
+                aggregations, wdwFns);
         return;
       }
       if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/test/queries/clientpositive/windowing_gby2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_gby2.q b/ql/src/test/queries/clientpositive/windowing_gby2.q
new file mode 100644
index 0000000..920f723
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/windowing_gby2.q
@@ -0,0 +1,41 @@
+set hive.mapred.mode=nonstrict;
+
+explain
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key;
+
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key;
+
+explain
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int))
as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int);
+
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int))
as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int);
+
+explain
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value;
+
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value;
+
+explain
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean;
+
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean;

http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/test/results/clientpositive/windowing_gby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_gby2.q.out b/ql/src/test/results/clientpositive/windowing_gby2.q.out
new file mode 100644
index 0000000..4bd6994
--- /dev/null
+++ b/ql/src/test/results/clientpositive/windowing_gby2.q.out
@@ -0,0 +1,652 @@
+PREHOOK: query: explain
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ws
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), c_int (type: int)
+              outputColumnNames: key, c_int
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+              Group By Operator
+                aggregations: sum(c_int)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint)
+            outputColumnNames: _col1
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: 0 (type: int), _col1 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: 0 (type: int)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey1 (type: bigint)
+          outputColumnNames: _col1
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: bigint
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col1 ASC NULLS FIRST
+                  partition by: 0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col1
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: rank_window_0 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+1
+2
+2
+2
+5
+5
+7
+PREHOOK: query: explain
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int))
as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int))
as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ws
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: UDFToInteger(key) (type: int), value (type: string), c_int (type:
int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+              Group By Operator
+                aggregations: min(_col1), sum(_col2)
+                keys: _col0 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col1 (type: string), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: min(VALUE._col0), sum(VALUE._col1)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string), _col2 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1
(type: bigint)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col1: string, _col2: bigint
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: avg_window_0
+                        arguments: _col0
+                        name: avg
+                        window function: GenericUDAFAverageEvaluatorDouble
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: avg_window_0 (type: double)
+              outputColumnNames: _col0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by
sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by
sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+NULL
+1.0
+2.0
+3.0
+PREHOOK: query: explain
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t3
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string), (UDFToFloat(c_int) -
c_float) (type: float), (UDFToDouble(c_float) / UDFToDouble(c_int)) (type: double), c_int
(type: int), ((UDFToDouble(c_float) / UDFToDouble(c_int)) - UDFToDouble(c_int)) (type: double)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+              Group By Operator
+                aggregations: sum(_col2), sum(_col3), max(_col4), sum(_col5)
+                keys: _col0 (type: string), _col1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col2 (type: double), _col3 (type: double), _col4 (type:
int), _col5 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), sum(VALUE._col3)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col2 (type: double)
+              sort order: +-
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: _col1 (type: string), _col3 (type: double), _col4 (type:
int), _col5 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1
(type: double), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: double)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: string, _col1: string, _col2: double, _col3: double,
_col4: int, _col5: double
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 DESC NULLS LAST
+                  partition by: _col0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col2
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int),
_col5 (type: double), rank_window_0 (type: int)
+              outputColumnNames: _col1, _col3, _col4, _col5, rank_window_0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: lower(_col1) (type: string), _col3 (type: double)
+              sort order: ++
+              Map-reduce partition columns: lower(_col1) (type: string)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type:
int), _col5 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col2 (type: string), KEY.reducesinkkey1
(type: double), VALUE._col4 (type: int), VALUE._col5 (type: double)
+          outputColumnNames: _col0, _col2, _col4, _col5, _col6
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col2: string, _col4: double, _col5: int, _col6:
double
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col4 ASC NULLS FIRST
+                  partition by: lower(_col2)
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: dense_rank_window_1
+                        arguments: _col4
+                        name: dense_rank
+                        window function: GenericUDAFDenseRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col5 (type: int), _col6 (type: double), dense_rank_window_1
(type: int)
+              outputColumnNames: _col0, _col5, _col6, dense_rank_window_1
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col5 (type: int), _col6 (type: double)
+              sort order: ++
+              Map-reduce partition columns: _col5 (type: int)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: dense_rank_window_1 (type: int), _col0 (type: int)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0
(type: int), KEY.reducesinkkey1 (type: double)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col1: int, _col6: int, _col7: double
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col7 ASC NULLS FIRST
+                  partition by: _col6
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: percent_rank_window_2
+                        arguments: _col7
+                        name: percent_rank
+                        window function: GenericUDAFPercentRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: int), _col0 (type: int), percent_rank_window_2 (type:
double)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc)
,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc)
,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+PREHOOK: query: explain
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ws
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: value is not null (type: boolean)
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+              Select Operator
+                expressions: value (type: string), c_int (type: int), c_boolean (type: boolean)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col1 (type: int), _col2 (type: boolean)
+          TableScan
+            alias: wr
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: cstring1 is not null (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
+              Select Operator
+                expressions: cint (type: int), cstring1 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
+                  value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col1 (type: string)
+          outputColumnNames: _col1, _col2, _col3
+          Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats:
NONE
+          Select Operator
+            expressions: _col2 (type: boolean), _col3 (type: int), _col1 (type: int)
+            outputColumnNames: _col2, _col3, _col1
+            Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats:
NONE
+            Group By Operator
+              aggregations: sum(_col3), sum(_col1)
+              keys: _col2 (type: boolean)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column
stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: boolean)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: boolean)
+              Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column
stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), sum(VALUE._col1)
+          keys: KEY._col0 (type: boolean)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats:
NONE
+          Select Operator
+            expressions: _col1 (type: bigint), _col2 (type: bigint)
+            outputColumnNames: _col1, _col2
+            Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats:
NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type:
double)
+              sort order: ++
+              Map-reduce partition columns: 0 (type: int)
+              Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column
stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats:
NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: bigint, _col2: bigint
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST
+                  partition by: 0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: (UDFToDouble(_col1) / UDFToDouble(_col2))
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats:
NONE
+            Select Operator
+              expressions: rank_window_0 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column
stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column
stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####


Mime
View raw message