hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmccl...@apache.org
Subject [39/51] [partial] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
Date Wed, 08 Aug 2018 07:38:02 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query26.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
index 17bbc6a..a3fe272 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select  i_item_id, 
         avg(cs_quantity) agg1,
         avg(cs_list_price) agg2,
@@ -18,7 +18,7 @@ select  i_item_id,
  order by i_item_id
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select  i_item_id, 
         avg(cs_quantity) agg1,
         avg(cs_list_price) agg2,
@@ -38,6 +38,10 @@ select  i_item_id,
  order by i_item_id
  limit 100
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -54,18 +58,40 @@ STAGE PLANS:
                   alias: promotion
                   filterExpr: (((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk
is not null) (type: boolean)
                   Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column
stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children:
FilterStringGroupColEqualStringScalar(col 9:string, val N), FilterStringGroupColEqualStringScalar(col
14:string, val N)), SelectColumnIsNotNull(col 0:int))
                     predicate: (((p_channel_email = 'N') or (p_channel_event = 'N')) and
p_promo_sk is not null) (type: boolean)
                     Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column
stats: NONE
                     Select Operator
                       expressions: p_promo_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE
Column stats: NONE
                       Spark HashTable Sink Operator
+                        Spark Hash Table Sink Vectorization:
+                            className: VectorSparkHashTableSinkOperator
+                            native: true
                         keys:
                           0 _col3 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Local Work:
               Map Reduce Local Work
 
@@ -85,79 +111,176 @@ STAGE PLANS:
                   alias: catalog_sales
                   filterExpr: (cs_bill_cdemo_sk is not null and cs_sold_date_sk is not null
and cs_item_sk is not null and cs_promo_sk is not null) (type: boolean)
                   Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col
4:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col
16:int))
                     predicate: (cs_bill_cdemo_sk is not null and cs_item_sk is not null and
cs_promo_sk is not null and cs_sold_date_sk is not null) (type: boolean)
                     Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: cs_sold_date_sk (type: int), cs_bill_cdemo_sk (type: int),
cs_item_sk (type: int), cs_promo_sk (type: int), cs_quantity (type: int), cs_list_price (type:
decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 4, 15, 16, 18, 20, 21, 27]
                       Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats:
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col1 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col1 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats:
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col2 (type: int), _col3 (type:
int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type:
decimal(7,2))
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 10 
             Map Operator Tree:
                 TableScan
                   alias: item
                   filterExpr: i_item_sk is not null (type: boolean)
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0:int)
                     predicate: i_item_sk is not null (type: boolean)
                     Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_item_id (type: string)
                       outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
                       Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
                         value expressions: _col1 (type: string)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 7 
             Map Operator Tree:
                 TableScan
                   alias: customer_demographics
                   filterExpr: ((cd_gender = 'F') and (cd_marital_status = 'W') and (cd_education_status
= 'Primary') and cd_demo_sk is not null) (type: boolean)
                   Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col
1:string, val F), FilterStringGroupColEqualStringScalar(col 2:string, val W), FilterStringGroupColEqualStringScalar(col
3:string, val Primary), SelectColumnIsNotNull(col 0:int))
                     predicate: ((cd_education_status = 'Primary') and (cd_gender = 'F') and
(cd_marital_status = 'W') and cd_demo_sk is not null) (type: boolean)
                     Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: cd_demo_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE
Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 8 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
                   filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column
stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col
6:int, val 1998), SelectColumnIsNotNull(col 0:int))
                     predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
                     Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE
Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -176,6 +299,11 @@ STAGE PLANS:
         Reducer 3 
             Local Work:
               Map Reduce Local Work
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -202,6 +330,11 @@ STAGE PLANS:
                     Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE
Column stats: NONE
                     value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6
(type: decimal(7,2)), _col7 (type: decimal(7,2))
         Reducer 4 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -226,9 +359,23 @@ STAGE PLANS:
                     value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3
(type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint),
_col7 (type: decimal(17,2)), _col8 (type: bigint)
         Reducer 5 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3),
sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col
2:bigint) -> bigint, VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFCountMerge(col
4:bigint) -> bigint, VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2), VectorUDAFCountMerge(col
6:bigint) -> bigint, VectorUDAFSumDecimal(col 7:decimal(17,2)) -> decimal(17,2), VectorUDAFCountMerge(col
8:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8
@@ -236,25 +383,50 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), (_col1 / _col2) (type: double), (_col3
/ _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), (_col7 / _col8) (type:
decimal(37,22))
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 9, 11, 12, 13]
+                      selectExpressions: LongColDivideLongColumn(col 1:bigint, col 2:bigint)
-> 9:double, DecimalColDivideDecimalColumn(col 3:decimal(17,2), col 10:decimal(19,0))(children:
CastLongToDecimal(col 4:bigint) -> 10:decimal(19,0)) -> 11:decimal(37,22), DecimalColDivideDecimalColumn(col
5:decimal(17,2), col 10:decimal(19,0))(children: CastLongToDecimal(col 6:bigint) -> 10:decimal(19,0))
-> 12:decimal(37,22), DecimalColDivideDecimalColumn(col 7:decimal(17,2), col 10:decimal(19,0))(children:
CastLongToDecimal(col 8:bigint) -> 10:decimal(19,0)) -> 13:decimal(37,22)
                   Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE
Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string)
                     sort order: +
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE
Column stats: NONE
                     TopN Hash Memory Usage: 0.1
                     value expressions: _col1 (type: double), _col2 (type: decimal(37,22)),
_col3 (type: decimal(37,22)), _col4 (type: decimal(37,22))
         Reducer 6 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double),
VALUE._col1 (type: decimal(37,22)), VALUE._col2 (type: decimal(37,22)), VALUE._col3 (type:
decimal(37,22))
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1, 2, 3, 4]
                 Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE
Column stats: NONE
                 Limit
                   Number of rows: 100
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
                   Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column
stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column
stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
index 294222e..82b4dbf 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select  i_item_id,
         s_state, grouping(s_state) g_state,
         avg(ss_quantity) agg1,
@@ -20,7 +20,7 @@ select  i_item_id,
          ,s_state
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select  i_item_id,
         s_state, grouping(s_state) g_state,
         avg(ss_quantity) agg1,
@@ -42,6 +42,10 @@ select  i_item_id,
          ,s_state
  limit 100
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -58,18 +62,40 @@ STAGE PLANS:
                   alias: store
                   filterExpr: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk
is not null) (type: boolean)
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column
stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col
24, values SD, FL, MI, LA, MO, SC), SelectColumnIsNotNull(col 0:int))
                     predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk
is not null) (type: boolean)
                     Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column
stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int), s_state (type: string)
                       outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 24]
                       Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE
Column stats: NONE
                       Spark HashTable Sink Operator
+                        Spark Hash Table Sink Vectorization:
+                            className: VectorSparkHashTableSinkOperator
+                            native: true
                         keys:
                           0 _col3 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Local Work:
               Map Reduce Local Work
 
@@ -89,79 +115,176 @@ STAGE PLANS:
                   alias: store_sales
                   filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and
ss_store_sk is not null and ss_item_sk is not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col
4:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col
2:int))
                     predicate: (ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk
is not null and ss_store_sk is not null) (type: boolean)
                     Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk
(type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)),
ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 2, 4, 7, 10, 12, 13, 19]
                       Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats:
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col2 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col2 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats:
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col1 (type: int), _col3 (type:
int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type:
decimal(7,2))
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 10 
             Map Operator Tree:
                 TableScan
                   alias: item
                   filterExpr: i_item_sk is not null (type: boolean)
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0:int)
                     predicate: i_item_sk is not null (type: boolean)
                     Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_item_id (type: string)
                       outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
                       Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE
Column stats: NONE
                         value expressions: _col1 (type: string)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 7 
             Map Operator Tree:
                 TableScan
                   alias: customer_demographics
                   filterExpr: ((cd_gender = 'M') and (cd_marital_status = 'U') and (cd_education_status
= '2 yr Degree') and cd_demo_sk is not null) (type: boolean)
                   Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col
1:string, val M), FilterStringGroupColEqualStringScalar(col 2:string, val U), FilterStringGroupColEqualStringScalar(col
3:string, val 2 yr Degree), SelectColumnIsNotNull(col 0:int))
                     predicate: ((cd_education_status = '2 yr Degree') and (cd_gender = 'M')
and (cd_marital_status = 'U') and cd_demo_sk is not null) (type: boolean)
                     Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: cd_demo_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE
Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 8 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
                   filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column
stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col
6:int, val 2001), SelectColumnIsNotNull(col 0:int))
                     predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
                     Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE
Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE
Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -180,6 +303,11 @@ STAGE PLANS:
         Reducer 3 
             Local Work:
               Map Reduce Local Work
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -206,6 +334,11 @@ STAGE PLANS:
                     Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE
Column stats: NONE
                     value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6
(type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string)
         Reducer 4 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -234,9 +367,23 @@ STAGE PLANS:
                       value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5
(type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(17,2)), _col8 (type: bigint),
_col9 (type: decimal(17,2)), _col10 (type: bigint)
         Reducer 5 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3),
sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCountMerge(col
4:bigint) -> bigint, VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2), VectorUDAFCountMerge(col
6:bigint) -> bigint, VectorUDAFSumDecimal(col 7:decimal(17,2)) -> decimal(17,2), VectorUDAFCountMerge(col
8:bigint) -> bigint, VectorUDAFSumDecimal(col 9:decimal(17,2)) -> decimal(17,2), VectorUDAFCountMerge(col
10:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type:
bigint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10
@@ -244,25 +391,50 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), grouping(_col2,
0) (type: bigint), (_col3 / _col4) (type: double), (_col5 / _col6) (type: decimal(37,22)),
(_col7 / _col8) (type: decimal(37,22)), (_col9 / _col10) (type: decimal(37,22))
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 11, 12, 14, 15, 16]
+                      selectExpressions: VectorUDFAdaptor(grouping(_col2, 0)) -> 11:bigint,
LongColDivideLongColumn(col 3:bigint, col 4:bigint) -> 12:double, DecimalColDivideDecimalColumn(col
5:decimal(17,2), col 13:decimal(19,0))(children: CastLongToDecimal(col 6:bigint) -> 13:decimal(19,0))
-> 14:decimal(37,22), DecimalColDivideDecimalColumn(col 7:decimal(17,2), col 13:decimal(19,0))(children:
CastLongToDecimal(col 8:bigint) -> 13:decimal(19,0)) -> 15:decimal(37,22), DecimalColDivideDecimalColumn(col
9:decimal(17,2), col 13:decimal(19,0))(children: CastLongToDecimal(col 10:bigint) -> 13:decimal(19,0))
-> 16:decimal(37,22)
                   Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE
Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string), _col1 (type: string)
                     sort order: ++
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT
columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats:
COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
                     value expressions: _col2 (type: bigint), _col3 (type: double), _col4
(type: decimal(37,22)), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22))
         Reducer 6 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type:
string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: decimal(37,22)),
VALUE._col3 (type: decimal(37,22)), VALUE._col4 (type: decimal(37,22))
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
                 Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE
Column stats: NONE
                 Limit
                   Number of rows: 100
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
                   Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column
stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column
stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat


Mime
View raw message