hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmccl...@apache.org
Subject [24/36] hive git commit: HIVE-16369: Vectorization: Support PTF (Part 1: No Custom Window Framing -- Default Only) (Matt McCline, reviewed by Ashutosh Chauhan)
Date Thu, 20 Jul 2017 10:16:50 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out
new file mode 100644
index 0000000..0514c2e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out
@@ -0,0 +1,314 @@
+PREHOOK: query: explain vectorization detail
+       select rank() over (order by return_ratio) as return_rank from
+       (select sum(wr.cint)/sum(ws.c_int)  as return_ratio
+                 from cbo_t3  ws join alltypesorc wr on ws.value = wr.cstring1
+                  group by ws.c_boolean ) in_web
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+       select rank() over (order by return_ratio) as return_rank from
+       (select sum(wr.cint)/sum(ws.c_int)  as return_ratio
+                 from cbo_t3  ws join alltypesorc wr on ws.value = wr.cstring1
+                  group by ws.c_boolean ) in_web
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: ws
+                  Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 1) -> boolean
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: value (type: string), c_int (type: int), c_boolean (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [1, 2, 4]
+                      Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumns: [1]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [2, 4]
+                        Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: boolean)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 5
+                    includeColumns: [1, 2, 4]
+                    dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+                    partitionColumnCount: 0
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: wr
+                  Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 6) -> boolean
+                    predicate: cstring1 is not null (type: boolean)
+                    Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: cint (type: int), cstring1 (type: string)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [2, 6]
+                      Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumns: [6]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [2]
+                        Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [2, 6]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col1, _col2, _col3
+                Statistics: Num rows: 36 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col3), sum(_col1)
+                  Group By Vectorization:
+                      groupByMode: HASH
+                      vectorOutput: false
+                      native: false
+                      vectorProcessingMode: NONE
+                      projectedOutputColumns: null
+                  keys: _col2 (type: boolean)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: boolean)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: boolean)
+                    Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY._col0:boolean, VALUE._col0:bigint, VALUE._col1:bigint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), sum(VALUE._col1)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    keyExpressions: col 0
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumns: [0, 1]
+                keys: KEY._col0 (type: boolean)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint), _col2 (type: bigint)
+                  outputColumnNames: _col1, _col2
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [1, 2]
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double)
+                    sort order: ++
+                    Map-reduce partition columns: 0 (type: int)
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        keyColumns: [3, 6]
+                        keyExpressions: ConstantVectorExpression(val 0) -> 3:long, DoubleColDivideDoubleColumn(col 4, col 5)(children: CastLongToDouble(col 1) -> 4:double, CastLongToDouble(col 2) -> 5:double) -> 6:double
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        partitionColumns: [7]
+                        valueColumns: [1, 2]
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:double, VALUE._col1:bigint, VALUE._col2:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint, bigint, double, double, double, double
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+                outputColumnNames: _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [2, 3]
+                Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: bigint, _col2: bigint
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST
+                        partition by: 0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: (UDFToDouble(_col1) / UDFToDouble(_col2))
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 9:double]
+                      functionNames: [rank]
+                      keyInputColumns: []
+                      native: true
+                      nonKeyInputColumns: [2, 3]
+                      orderExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 8:double]
+                      outputColumns: [4, 2, 3]
+                      outputTypes: [int, bigint, bigint]
+                      partitionExpressions: [ConstantVectorExpression(val 0) -> 5:long]
+                      streamingColumns: [4]
+                  Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: rank_window_0 (type: int)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [4]
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank() over (order by return_ratio) as return_rank from
+       (select sum(wr.cint)/sum(ws.c_int)  as return_ratio
+                 from cbo_t3  ws join alltypesorc wr on ws.value = wr.cstring1
+                  group by ws.c_boolean ) in_web
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by return_ratio) as return_rank from
+       (select sum(wr.cint)/sum(ws.c_int)  as return_ratio
+                 from cbo_t3  ws join alltypesorc wr on ws.value = wr.cstring1
+                  group by ws.c_boolean ) in_web
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank

http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
new file mode 100644
index 0000000..72e664f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
@@ -0,0 +1,1158 @@
+PREHOOK: query: explain vectorization detail
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: ws
+                  Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4]
+                  Select Operator
+                    expressions: key (type: string), c_int (type: int)
+                    outputColumnNames: key, c_int
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 2]
+                    Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(c_int)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumLong(col 2) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          vectorOutput: true
+                          keyExpressions: col 0
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumns: [0]
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumns: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [1]
+                        Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 5
+                    includeColumns: [0, 2]
+                    dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:string, VALUE._col0:bigint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 1) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    keyExpressions: col 0
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumns: [0]
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col1
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [1]
+                  Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: 0 (type: int), _col1 (type: bigint)
+                    sort order: ++
+                    Map-reduce partition columns: 0 (type: int)
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        keyColumns: [2, 1]
+                        keyExpressions: ConstantVectorExpression(val 0) -> 2:long
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        partitionColumns: [3]
+                        valueColumns: []
+                    Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint, bigint
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: bigint)
+                outputColumnNames: _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [1]
+                Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: bigint
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 ASC NULLS FIRST
+                        partition by: 0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col1
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 1]
+                      functionNames: [rank]
+                      keyInputColumns: [1]
+                      native: true
+                      nonKeyInputColumns: []
+                      orderExpressions: [col 1]
+                      outputColumns: [2, 1]
+                      outputTypes: [int, bigint]
+                      partitionExpressions: [ConstantVectorExpression(val 0) -> 3:long]
+                      streamingColumns: [2]
+                  Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: rank_window_0 (type: int)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [2]
+                    Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank
+1
+2
+2
+2
+5
+5
+7
+PREHOOK: query: explain vectorization detail
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: ws
+                  Statistics: Num rows: 20 Data size: 3306 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4]
+                  Select Operator
+                    expressions: UDFToInteger(key) (type: int), value (type: string), c_int (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [5, 1, 2]
+                        selectExpressions: CastStringToLong(col 0) -> 5:int
+                    Statistics: Num rows: 20 Data size: 3306 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: min(_col1), sum(_col2)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFSumLong(col 2) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          vectorOutput: true
+                          keyExpressions: col 5
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumns: [0, 1]
+                      keys: _col0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            keyColumns: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [1, 2]
+                        Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string), _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 5
+                    includeColumns: [0, 1, 2]
+                    dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY._col0:int, VALUE._col0:string, VALUE._col1:bigint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), sum(VALUE._col1)
+                Group By Vectorization:
+                    aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFSumLong(col 2) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    keyExpressions: col 0
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumns: [0, 1]
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string), _col2 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col1 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumns: [1, 2]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      partitionColumns: [1]
+                      valueColumns: [0]
+                  Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: int)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:bigint, VALUE._col0:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: double
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [2, 0, 1]
+                Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: int, _col1: string, _col2: bigint
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col2 ASC NULLS FIRST
+                        partition by: _col1
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: avg_window_0
+                              arguments: _col0
+                              name: avg
+                              window function: GenericUDAFAverageEvaluatorDouble
+                              window frame: RANGE PRECEDING(MAX)~CURRENT
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorLongAvg]
+                      functionInputExpressions: [col 2]
+                      functionNames: [avg]
+                      keyInputColumns: [0, 1]
+                      native: true
+                      nonKeyInputColumns: [2]
+                      orderExpressions: [col 1]
+                      outputColumns: [3, 2, 0, 1]
+                      outputTypes: [double, int, string, bigint]
+                      partitionExpressions: [col 0]
+                      streamingColumns: []
+                  Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: avg_window_0 (type: double)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [3]
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank
+NULL
+1.0
+2.0
+3.0
+PREHOOK: query: explain vectorization detail
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: cbo_t3
+                  Statistics: Num rows: 20 Data size: 3382 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4]
+                  Select Operator
+                    expressions: key (type: string), value (type: string), (UDFToFloat(c_int) - c_float) (type: float), (UDFToDouble(c_float) / UDFToDouble(c_int)) (type: double), c_int (type: int), ((UDFToDouble(c_float) / UDFToDouble(c_int)) - UDFToDouble(c_int)) (type: double)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 1, 6, 7, 2, 9]
+                        selectExpressions: DoubleColSubtractDoubleColumn(col 5, col 3)(children: CastLongToFloatViaLongToDouble(col 2) -> 5:double) -> 6:double, DoubleColDivideDoubleColumn(col 3, col 5)(children: col 3, CastLongToDouble(col 2) -> 5:double) -> 7:double, DoubleColSubtractDoubleColumn(col 8, col 5)(children: DoubleColDivideDoubleColumn(col 3, col 5)(children: col 3, CastLongToDouble(col 2) -> 5:double) -> 8:double, CastLongToDouble(col 2) -> 5:double) -> 9:double
+                    Statistics: Num rows: 20 Data size: 3382 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col2), sum(_col3), max(_col4), sum(_col5)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumDouble(col 6) -> double, VectorUDAFSumDouble(col 7) -> double, VectorUDAFMaxLong(col 2) -> int, VectorUDAFSumDouble(col 9) -> double
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          vectorOutput: true
+                          keyExpressions: col 0, col 1
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumns: [0, 1, 2, 3]
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumns: [0, 1]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [2, 3, 4, 5]
+                        Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 5
+                    includeColumns: [0, 1, 2, 3]
+                    dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: double, double, double, double, double
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 6
+                    dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:double, VALUE._col1:double, VALUE._col2:int, VALUE._col3:double
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), sum(VALUE._col3)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumDouble(col 2) -> double, VectorUDAFSumDouble(col 3) -> double, VectorUDAFMaxLong(col 4) -> int, VectorUDAFSumDouble(col 5) -> double
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    keyExpressions: col 0, col 1
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumns: [0, 1, 2, 3]
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col2 (type: double)
+                  sort order: +-
+                  Map-reduce partition columns: _col0 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumns: [0, 2]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      partitionColumns: [0]
+                      valueColumns: [1, 3, 4, 5]
+                  Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: az
+                reduceColumnSortOrder: +-
+                groupByVectorOutput: true
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 6
+                    dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col0:string, VALUE._col1:double, VALUE._col2:int, VALUE._col3:double
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint, string, string
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: double)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 2, 1, 3, 4, 5]
+                Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: string, _col1: string, _col2: double, _col3: double, _col4: int, _col5: double
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col2 DESC NULLS LAST
+                        partition by: _col0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col2
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 1]
+                      functionNames: [rank]
+                      keyInputColumns: [0, 1]
+                      native: true
+                      nonKeyInputColumns: [2, 3, 4, 5]
+                      orderExpressions: [col 1]
+                      outputColumns: [6, 0, 2, 1, 3, 4, 5]
+                      outputTypes: [int, string, string, double, double, int, double]
+                      partitionExpressions: [col 0]
+                      streamingColumns: [6]
+                  Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: rank_window_0 (type: int), _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double)
+                    outputColumnNames: rank_window_0, _col1, _col3, _col4, _col5
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [6, 2, 3, 4, 5]
+                    Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: lower(_col1) (type: string), _col3 (type: double)
+                      sort order: ++
+                      Map-reduce partition columns: lower(_col1) (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkObjectHashOperator
+                          keyColumns: [7, 3]
+                          keyExpressions: StringLower(col 2) -> 7:String
+                          native: true
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          partitionColumns: [8]
+                          valueColumns: [6, 2, 4, 5]
+                      Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double)
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 6
+                    dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col0:int, VALUE._col2:string, VALUE._col4:int, VALUE._col5:double
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint, string
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: double)
+                outputColumnNames: _col0, _col2, _col4, _col5, _col6
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [2, 3, 1, 4, 5]
+                Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: int, _col2: string, _col4: double, _col5: int, _col6: double
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col4 ASC NULLS FIRST
+                        partition by: lower(_col2)
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: dense_rank_window_1
+                              arguments: _col4
+                              name: dense_rank
+                              window function: GenericUDAFDenseRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorDenseRank]
+                      functionInputExpressions: [col 1]
+                      functionNames: [dense_rank]
+                      keyInputColumns: [1]
+                      native: true
+                      nonKeyInputColumns: [2, 3, 4, 5]
+                      orderExpressions: [col 1]
+                      outputColumns: [6, 2, 3, 1, 4, 5]
+                      outputTypes: [int, int, string, double, int, double]
+                      partitionExpressions: [StringLower(col 3) -> 7:String]
+                      streamingColumns: [6]
+                  Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: dense_rank_window_1 (type: int), _col0 (type: int), _col5 (type: int), _col6 (type: double)
+                    outputColumnNames: dense_rank_window_1, _col0, _col5, _col6
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [6, 2, 4, 5]
+                    Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col5 (type: int), _col6 (type: double)
+                      sort order: ++
+                      Map-reduce partition columns: _col5 (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkObjectHashOperator
+                          keyColumns: [4, 5]
+                          native: true
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          partitionColumns: [4]
+                          valueColumns: [6, 2]
+                      Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: dense_rank_window_1 (type: int), _col0 (type: int)
+        Reducer 5 
+            Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: PTF operator: percent_rank not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
+                vectorized: false
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double)
+                outputColumnNames: _col0, _col1, _col6, _col7
+                Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: int, _col1: int, _col6: int, _col7: double
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col7 ASC NULLS FIRST
+                        partition by: _col6
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: percent_rank_window_2
+                              arguments: _col7
+                              name: percent_rank
+                              window function: GenericUDAFPercentRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col1 (type: int), _col0 (type: int), percent_rank_window_2 (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 10 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 10 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+_c0	_c1	_c2
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+1	1	0.0
+PREHOOK: query: explain vectorization detail
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: ws
+                  Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 1) -> boolean
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: value (type: string), c_int (type: int), c_boolean (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [1, 2, 4]
+                      Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumns: [1]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [2, 4]
+                        Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: boolean)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 5
+                    includeColumns: [1, 2, 4]
+                    dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+                    partitionColumnCount: 0
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: wr
+                  Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 6) -> boolean
+                    predicate: cstring1 is not null (type: boolean)
+                    Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: cint (type: int), cstring1 (type: string)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [2, 6]
+                      Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumns: [6]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [2]
+                        Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [2, 6]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col1, _col2, _col3
+                Statistics: Num rows: 36 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col3), sum(_col1)
+                  Group By Vectorization:
+                      groupByMode: HASH
+                      vectorOutput: false
+                      native: false
+                      vectorProcessingMode: NONE
+                      projectedOutputColumns: null
+                  keys: _col2 (type: boolean)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: boolean)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: boolean)
+                    Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY._col0:boolean, VALUE._col0:bigint, VALUE._col1:bigint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), sum(VALUE._col1)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    keyExpressions: col 0
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumns: [0, 1]
+                keys: KEY._col0 (type: boolean)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint), _col2 (type: bigint)
+                  outputColumnNames: _col1, _col2
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [1, 2]
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double)
+                    sort order: ++
+                    Map-reduce partition columns: 0 (type: int)
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        keyColumns: [3, 6]
+                        keyExpressions: ConstantVectorExpression(val 0) -> 3:long, DoubleColDivideDoubleColumn(col 4, col 5)(children: CastLongToDouble(col 1) -> 4:double, CastLongToDouble(col 2) -> 5:double) -> 6:double
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        partitionColumns: [7]
+                        valueColumns: [1, 2]
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:double, VALUE._col1:bigint, VALUE._col2:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint, bigint, double, double, double, double
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+                outputColumnNames: _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [2, 3]
+                Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: bigint, _col2: bigint
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST
+                        partition by: 0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: (UDFToDouble(_col1) / UDFToDouble(_col2))
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 9:double]
+                      functionNames: [rank]
+                      keyInputColumns: []
+                      native: true
+                      nonKeyInputColumns: [2, 3]
+                      orderExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 8:double]
+                      outputColumns: [4, 2, 3]
+                      outputTypes: [int, bigint, bigint]
+                      partitionExpressions: [ConstantVectorExpression(val 0) -> 5:long]
+                      streamingColumns: [4]
+                  Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: rank_window_0 (type: int)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [4]
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank


Mime
View raw message