hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmccl...@apache.org
Subject [18/36] hive git commit: HIVE-16369: Vectorization: Support PTF (Part 1: No Custom Window Framing -- Default Only) (Matt McCline, reviewed by Ashutosh Chauhan)
Date Thu, 20 Jul 2017 10:16:44 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out
new file mode 100644
index 0000000..c678130
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out
@@ -0,0 +1,1036 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+	   ts timestamp, 
+           `dec` decimal(4,2),  
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+	   ts timestamp, 
+           `dec` decimal(4,2),  
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain vectorization detail
+select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats:
COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_name (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        keyColumns: [2, 1]
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        partitionColumns: [2]
+                        valueColumns: []
+                    Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column
stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 9
+                    includeColumns: [1, 2]
+                    dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string,
p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type:
string)
+                outputColumnNames: _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [1, 0]
+                Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats:
COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: string, _col2: string
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col1
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 1]
+                      functionNames: [rank]
+                      keyInputColumns: [1, 0]
+                      native: true
+                      nonKeyInputColumns: []
+                      orderExpressions: [col 1]
+                      outputColumns: [2, 1, 0]
+                      outputTypes: [int, string, string]
+                      partitionExpressions: [col 0]
+                      streamingColumns: [2]
+                  Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Select Operator
+                    expressions: _col2 (type: string), rank_window_0 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 2]
+                    Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column
stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column
stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain vectorization detail
+select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a 
+where r < 4
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a 
+where r < 4
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats:
COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_name (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkOperator
+                        native: false
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe
for keys IS true, LazyBinarySerDe for values IS true
+                        nativeConditionsNotMet: No PTF TopN IS false
+                    Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column
stats: COMPLETE
+                    TopN Hash Memory Usage: 0.8
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 9
+                    includeColumns: [1, 2]
+                    dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string,
p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type:
string)
+                outputColumnNames: _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [1, 0]
+                Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats:
COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: string, _col2: string
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col1
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 1]
+                      functionNames: [rank]
+                      keyInputColumns: [1, 0]
+                      native: true
+                      nonKeyInputColumns: []
+                      orderExpressions: [col 1]
+                      outputColumns: [2, 1, 0]
+                      outputTypes: [int, string, string]
+                      partitionExpressions: [col 0]
+                      streamingColumns: [2]
+                  Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterLongColLessLongScalar(col 2, val 4) ->
boolean
+                    predicate: (rank_window_0 < 4) (type: boolean)
+                    Statistics: Num rows: 8 Data size: 3896 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: _col2 (type: string), rank_window_0 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [0, 2]
+                      Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column
stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column
stats: COMPLETE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a 
+where r < 4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a 
+where r < 4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+a.p_mfgr	a.r
+Manufacturer#1	1
+Manufacturer#1	1
+Manufacturer#1	3
+Manufacturer#2	1
+Manufacturer#2	2
+Manufacturer#2	3
+Manufacturer#3	1
+Manufacturer#3	2
+Manufacturer#3	3
+Manufacturer#4	1
+Manufacturer#4	2
+Manufacturer#4	3
+Manufacturer#5	1
+Manufacturer#5	2
+Manufacturer#5	3
+PREHOOK: query: select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a 
+where r < 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * 
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a 
+where r < 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+a.p_mfgr	a.r
+Manufacturer#1	1
+Manufacturer#1	1
+Manufacturer#2	1
+Manufacturer#3	1
+Manufacturer#4	1
+Manufacturer#5	1
+PREHOOK: query: explain vectorization detail
+select * 
+from (select t, f, rank() over(partition by t order by f) r from over10k) a 
+where r < 6 and t < 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select * 
+from (select t, f, rank() over(partition by t order by f) r from over10k) a 
+where r < 6 and t < 5
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: over10k
+                  Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column
stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterLongColLessLongScalar(col 0, val 5) ->
boolean
+                    predicate: (t < 5) (type: boolean)
+                    Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column
stats: NONE
+                    Reduce Output Operator
+                      key expressions: t (type: tinyint), f (type: float)
+                      sort order: ++
+                      Map-reduce partition columns: t (type: tinyint)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe
for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: No PTF TopN IS false
+                      Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE
Column stats: NONE
+                      TopN Hash Memory Usage: 0.8
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 11
+                    includeColumns: [0, 4]
+                    dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double,
bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:float
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type:
float)
+                outputColumnNames: _col0, _col4
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1]
+                Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column
stats: NONE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: tinyint, _col4: float
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col4 ASC NULLS FIRST
+                        partition by: _col0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col4
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 1]
+                      functionNames: [rank]
+                      keyInputColumns: [0, 1]
+                      native: true
+                      nonKeyInputColumns: []
+                      orderExpressions: [col 1]
+                      outputColumns: [2, 0, 1]
+                      outputTypes: [int, tinyint, float]
+                      partitionExpressions: [col 0]
+                      streamingColumns: [2]
+                  Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterLongColLessLongScalar(col 2, val 6) ->
boolean
+                    predicate: (rank_window_0 < 6) (type: boolean)
+                    Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: tinyint), _col4 (type: float), rank_window_0
(type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [0, 1, 2]
+                      Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE
Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE
Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * 
+from (select t, f, rank() over(partition by t order by f) r from over10k) a 
+where r < 6 and t < 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select * 
+from (select t, f, rank() over(partition by t order by f) r from over10k) a 
+where r < 6 and t < 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+a.t	a.f	a.r
+-3	0.56	1
+-3	0.83	2
+-3	2.26	3
+-3	2.48	4
+-3	3.82	5
+-2	1.55	1
+-2	1.65	2
+-2	1.79	3
+-2	4.06	4
+-2	4.4	5
+-1	0.79	1
+-1	0.95	2
+-1	1.27	3
+-1	1.49	4
+-1	2.8	5
+0	0.08	1
+0	0.94	2
+0	1.44	3
+0	2.0	4
+0	2.12	5
+1	0.13	1
+1	0.44	2
+1	1.04	3
+1	3.41	4
+1	3.45	5
+2	2.21	1
+2	3.1	2
+2	9.93	3
+2	11.43	4
+2	15.45	5
+3	0.12	1
+3	0.19	2
+3	7.14	3
+3	7.97	4
+3	8.95	5
+4	2.26	1
+4	5.51	2
+4	5.53	3
+4	5.76	4
+4	7.26	5
+PREHOOK: query: select *
+from (select t, f, row_number() over(partition by t order by f) r from over10k) a
+where r < 8 and t < 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from (select t, f, row_number() over(partition by t order by f) r from over10k) a
+where r < 8 and t < 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+a.t	a.f	a.r
+-3	0.56	1
+-3	0.83	2
+-3	2.26	3
+-3	2.48	4
+-3	3.82	5
+-3	6.8	6
+-3	6.83	7
+-2	1.55	1
+-2	1.65	2
+-2	1.79	3
+-2	4.06	4
+-2	4.4	5
+-2	5.43	6
+-2	5.59	7
+-1	0.79	1
+-1	0.95	2
+-1	1.27	3
+-1	1.49	4
+-1	2.8	5
+-1	4.08	6
+-1	4.31	7
+PREHOOK: query: explain vectorization detail
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: false
+  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: ctinyint (type: tinyint), cdouble (type: double)
+                    sort order: ++
+                    Map-reduce partition columns: ctinyint (type: tinyint)
+                    Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column
stats: COMPLETE
+                    TopN Hash Memory Usage: 0.8
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type:
double)
+                outputColumnNames: _col0, _col5
+                Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column
stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: tinyint, _col5: double
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col5 ASC NULLS FIRST
+                        partition by: _col0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col5
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (rank_window_0 < 5) (type: boolean)
+                    Statistics: Num rows: 4096 Data size: 1134436 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0
(type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column
stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE
Column stats: COMPLETE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table if exists sB
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists sB
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ','  STORED AS
TEXTFILE as  
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sB
+POSTHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ','  STORED AS
TEXTFILE as  
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sB
+POSTHOOK: Lineage: sb.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: sb.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+POSTHOOK: Lineage: sb.r SCRIPT [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null
 ), ]
+a.ctinyint	a.cdouble	a.r
+PREHOOK: query: select * from sB
+where ctinyint is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sb
+#### A masked pattern was here ####
+POSTHOOK: query: select * from sB
+where ctinyint is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sb
+#### A masked pattern was here ####
+sb.ctinyint	sb.cdouble	sb.r
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+PREHOOK: query: drop table if exists sD
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists sD
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: explain vectorization detail
+create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ','  STORED AS TEXTFILE as  
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: explain vectorization detail
+create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ','  STORED AS TEXTFILE as  
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-2, Stage-0
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column
stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Reduce Output Operator
+                    key expressions: ctinyint (type: tinyint), cdouble (type: double)
+                    sort order: ++
+                    Map-reduce partition columns: ctinyint (type: tinyint)
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkOperator
+                        native: false
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe
for keys IS true, LazyBinarySerDe for values IS true
+                        nativeConditionsNotMet: No PTF TopN IS false
+                    Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column
stats: COMPLETE
+                    TopN Hash Memory Usage: 0.8
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 5]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint,
cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp,
cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type:
double)
+                outputColumnNames: _col0, _col5
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1]
+                Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column
stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: tinyint, _col5: double
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col5 ASC NULLS FIRST
+                        partition by: _col0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col5
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 1]
+                      functionNames: [rank]
+                      keyInputColumns: [0, 1]
+                      native: true
+                      nonKeyInputColumns: []
+                      orderExpressions: [col 1]
+                      outputColumns: [2, 0, 1]
+                      outputTypes: [int, tinyint, double]
+                      partitionExpressions: [col 0]
+                      streamingColumns: [2]
+                  Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterLongColLessLongScalar(col 2, val 5) ->
boolean
+                    predicate: (rank_window_0 < 5) (type: boolean)
+                    Statistics: Num rows: 4096 Data size: 1134436 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0
(type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [0, 1, 2]
+                      Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column
stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE
Column stats: COMPLETE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                            name: default.sD
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+      Create Table Operator:
+        Create Table
+          columns: ctinyint tinyint, cdouble double, r int
+          field delimiter: ,
+          input format: org.apache.hadoop.mapred.TextInputFormat
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          name: default.sD
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ','  STORED AS
TEXTFILE as  
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sD
+POSTHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ','  STORED AS
TEXTFILE as  
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble)
r from  alltypesorc) a where r < 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sD
+POSTHOOK: Lineage: sd.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: sd.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+POSTHOOK: Lineage: sd.r SCRIPT [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null
 ), ]
+a.ctinyint	a.cdouble	a.r
+PREHOOK: query: select * from sD
+where ctinyint is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sd
+#### A masked pattern was here ####
+POSTHOOK: query: select * from sD
+where ctinyint is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sd
+#### A masked pattern was here ####
+sd.ctinyint	sd.cdouble	sd.r
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1
+NULL	NULL	1


Mime
View raw message