hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmccl...@apache.org
Subject [10/34] hive git commit: HIVE-16589: Vectorization: Support Complex Types and GroupBy modes PARTIAL2, FINAL, and COMPLETE for AVG, VARIANCE (Matt McCline, reviewed by Jason Dere)
Date Thu, 22 Jun 2017 23:40:46 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
index e731c2d..d5235aa 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION 
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT   cstring1,
          cdouble,
          ctimestamp1,
@@ -18,7 +18,7 @@ WHERE    ((cstring2 LIKE '%b%')
               OR (cstring1 < 'a')))
 GROUP BY cstring1, cdouble, ctimestamp1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION 
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT   cstring1,
          cdouble,
          ctimestamp1,
@@ -58,15 +58,35 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean
                     predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean)
                     Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
                       outputColumnNames: cdouble, cstring1, ctimestamp1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [5, 6, 8]
                       Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFCount(col 5) -> bigint, VectorUDAFStdSampDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMinDouble(col 5) -> double
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            vectorOutput: true
+                            keyExpressions: col 5, col 6, col 8
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumns: [0, 1, 2]
                         keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -75,26 +95,56 @@ STAGE PLANS:
                           key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
                           sort order: +++
                           Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkObjectHashOperator
+                              keyColumns: [0, 1, 2]
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              partitionColumns: [0, 1, 2]
+                              valueColumns: [3, 4, 5]
                           Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)
             Execution mode: vectorized
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                groupByVectorOutput: false
+                groupByVectorOutput: true
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [5, 6, 7, 8]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
         Reducer 2 
+            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
-                notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col1] not supported
-                vectorized: false
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 6
+                    dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
+                    partitionColumnCount: 0
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFStdSampFinal(col 4) -> double, VectorUDAFMinDouble(col 5) -> double
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    keyExpressions: col 0, col 1, col 2
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumns: [0, 1, 2]
                 keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -102,9 +152,17 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
+                      selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4) -> 7:double, DoubleColMultiplyDoubleColumn(col 4, col 9)(children: CastLongToDouble(col 3) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0) -> 9:double, DecimalColDivideDecimalScalar(col 11, val -1.389)(children: CastLongToDecimal(col 3) -> 11:decimal(19,0)) -> 12:decimal(28,6)
                   Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
index 32d1c0b..9395a01 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION 
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT   cfloat,
          cstring1,
          cint,
@@ -22,7 +22,7 @@ WHERE    (((cbigint > -23)
                   OR (cfloat = cdouble))))
 ORDER BY cbigint, cfloat
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION 
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT   cfloat,
          cstring1,
          cint,
@@ -66,16 +66,34 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean
                     predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean)
                     Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17]
+                          selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double
                       Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col5 (type: bigint), _col0 (type: float)
                         sort order: ++
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkObjectHashOperator
+                            keyColumns: [3, 4]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 21, 17]
                         Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double)
             Execution mode: vectorized
@@ -87,22 +105,41 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 4, 5, 6, 8]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: decimal(13,3), double, double, bigint, double, double, double, double, decimal(19,0), decimal(11,4), double
         Reducer 2 
             Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
                 groupByVectorOutput: true
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 14
+                    dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double
+                    partitionColumnCount: 0
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13]
                 Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
index 709a75f..a3c70bb 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
@@ -1,3 +1,180 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT AVG(csmallint),
+       (AVG(csmallint) % -563),
+       (AVG(csmallint) + 762),
+       SUM(cfloat),
+       VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (SUM(cfloat) - AVG(csmallint)),
+       COUNT(*),
+       (-((SUM(cfloat) - AVG(csmallint)))),
+       (VAR_POP(cbigint) - 762),
+       MIN(ctinyint),
+       ((-(VAR_POP(cbigint))) + MIN(ctinyint)),
+       AVG(cdouble),
+       (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat))
+FROM   alltypesorc
+WHERE  (((ctimestamp1 < ctimestamp2)
+         AND ((cstring2 LIKE 'b%')
+              AND (cfloat <= -5638.15)))
+        OR ((cdouble < ctinyint)
+            AND ((-10669 != ctimestamp2)
+                 OR (359 > cint))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT AVG(csmallint),
+       (AVG(csmallint) % -563),
+       (AVG(csmallint) + 762),
+       SUM(cfloat),
+       VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (SUM(cfloat) - AVG(csmallint)),
+       COUNT(*),
+       (-((SUM(cfloat) - AVG(csmallint)))),
+       (VAR_POP(cbigint) - 762),
+       MIN(ctinyint),
+       ((-(VAR_POP(cbigint))) + MIN(ctinyint)),
+       AVG(cdouble),
+       (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat))
+FROM   alltypesorc
+WHERE  (((ctimestamp1 < ctimestamp2)
+         AND ((cstring2 LIKE 'b%')
+              AND (cfloat <= -5638.15)))
+        OR ((cdouble < ctinyint)
+            AND ((-10669 != ctimestamp2)
+                 OR (359 > cint))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8, col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern b%) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarNotEqualDoubleColumn(val -10669.0, col 12)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterLongScalarGreaterLongColumn(val 359, col 2) -> boolean) -> boolean) -> boolean) -> boolean
+                    predicate: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((-10669.0 <> UDFToDouble(ctimestamp2)) or (359 > cint)))) (type: boolean)
+                    Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double)
+                      outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [0, 1, 3, 4, 5]
+                      Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFAvgLong(col 1) -> struct<count:bigint,sum:double,input:bigint>, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFAvgDouble(col 5) -> struct<count:bigint,sum:double,input:double>
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            vectorOutput: true
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                        Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumns: []
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumns: [0, 1, 2, 3, 4, 5]
+                          Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: struct<count:bigint,sum:double,input:smallint>), _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct<count:bigint,sum:double,input:double>)
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: double
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 6
+                    dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:smallint>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:tinyint, VALUE._col5:struct<count:bigint,sum:double,input:double>
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5)
+                Group By Vectorization:
+                    aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFVarPopFinal(col 2) -> double, VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFMinLong(col 4) -> tinyint, VectorUDAFAvgFinal(col 5) -> double
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: double), (_col0 % -563.0) (type: double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [0, 6, 7, 1, 2, 8, 9, 3, 11, 10, 4, 14, 5, 12]
+                      selectExpressions: DoubleColModuloDoubleScalar(col 0, val -563.0) -> 6:double, DoubleColAddDoubleScalar(col 0, val 762.0) -> 7:double, DoubleColUnaryMinus(col 2) -> 8:double, DoubleColSubtractDoubleColumn(col 1, col 0) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColSubtractDoubleColumn(col 1, col 0) -> 10:double) -> 11:double, DoubleColSubtractDoubleScalar(col 2, val 762.0) -> 10:double, DoubleColAddDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 2) -> 12:double, CastLongToDouble(col 4) -> 13:double) -> 14:double, DoubleColSubtractDoubleColumn(col 15, col 1)(children: DoubleColAddDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 2) -> 12:double, CastLongToDouble(col 4) -> 13:double) -> 15:double) -> 12:double
+                  Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: SELECT AVG(csmallint),
        (AVG(csmallint) % -563),
        (AVG(csmallint) + 762),

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
index 2398dee..a335c7d 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
@@ -1,4 +1,186 @@
 WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT STDDEV_SAMP(csmallint),
+       (STDDEV_SAMP(csmallint) - 10.175),
+       STDDEV_POP(ctinyint),
+       (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)),
+       (-(STDDEV_POP(ctinyint))),
+       (STDDEV_SAMP(csmallint) % 79.553),
+       (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))),
+       STDDEV_SAMP(cfloat),
+       (-(STDDEV_SAMP(csmallint))),
+       SUM(cfloat),
+       ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)),
+       (-((STDDEV_SAMP(csmallint) - 10.175))),
+       AVG(cint),
+       (-3728 - STDDEV_SAMP(csmallint)),
+       STDDEV_POP(cint),
+       (AVG(cint) / STDDEV_SAMP(cfloat))
+FROM   alltypesorc
+WHERE  (((cint <= cfloat)
+         AND ((79.553 != cbigint)
+              AND (ctimestamp2 = -29071)))
+        OR ((cbigint > cdouble)
+            AND ((79.553 <= csmallint)
+                 AND (ctimestamp1 > ctimestamp2))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT STDDEV_SAMP(csmallint),
+       (STDDEV_SAMP(csmallint) - 10.175),
+       STDDEV_POP(ctinyint),
+       (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)),
+       (-(STDDEV_POP(ctinyint))),
+       (STDDEV_SAMP(csmallint) % 79.553),
+       (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))),
+       STDDEV_SAMP(cfloat),
+       (-(STDDEV_SAMP(csmallint))),
+       SUM(cfloat),
+       ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)),
+       (-((STDDEV_SAMP(csmallint) - 10.175))),
+       AVG(cint),
+       (-3728 - STDDEV_SAMP(csmallint)),
+       STDDEV_POP(cint),
+       (AVG(cint) / STDDEV_SAMP(cfloat))
+FROM   alltypesorc
+WHERE  (((cint <= cfloat)
+         AND ((79.553 != cbigint)
+              AND (ctimestamp2 = -29071)))
+        OR ((cbigint > cdouble)
+            AND ((79.553 <= csmallint)
+                 AND (ctimestamp1 > ctimestamp2))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -29071.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 1) -> 14:decimal(8,3)) -> boolean, FilterTimestampColGreaterTimestampColumn(col 8, col 9) -> boolean) -> boolean) -> boolean
+                    predicate: (((UDFToFloat(cint) <= cfloat) and (79.553 <> CAST( cbigint AS decimal(22,3))) and (UDFToDouble(ctimestamp2) = -29071.0)) or ((UDFToDouble(cbigint) > cdouble) and (79.553 <= CAST( csmallint AS decimal(8,3))) and (ctimestamp1 > ctimestamp2))) (type: boolean)
+                    Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float)
+                      outputColumnNames: ctinyint, csmallint, cint, cfloat
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [0, 1, 2, 4]
+                      Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFStdSampLong(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopLong(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdSampDouble(col 4) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFSumDouble(col 4) -> double, VectorUDAFAvgLong(col 2) -> struct<count:bigint,sum:double,input:bigint>, VectorUDAFStdPopLong(col 2) -> struct<count:bigint,sum:double,variance:double>
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            vectorOutput: true
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                        Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumns: []
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumns: [0, 1, 2, 3, 4, 5]
+                          Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: struct<count:bigint,sum:double,variance:double>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: double), _col4 (type: struct<count:bigint,sum:double,input:int>), _col5 (type: struct<count:bigint,sum:double,variance:double>)
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 4, 5, 8, 9]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: double, decimal(22,3), decimal(8,3)
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 6
+                    dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:double, VALUE._col4:struct<count:bigint,sum:double,input:int>, VALUE._col5:struct<count:bigint,sum:double,variance:double>
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5)
+                Group By Vectorization:
+                    aggregators: VectorUDAFStdSampFinal(col 0) -> double, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFStdSampFinal(col 2) -> double, VectorUDAFSumDouble(col 3) -> double, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: double), (_col0 - 10.175) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- _col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [0, 6, 1, 8, 7, 9, 10, 2, 11, 3, 14, 13, 4, 12, 5, 15]
+                      selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 6:double, DoubleColMultiplyDoubleColumn(col 0, col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 1) -> 7:double, DoubleColModuloDoubleScalar(col 0, val 79.553) -> 9:double, DoubleColUnaryMinus(col 11)(children: DoubleColMultiplyDoubleColumn(col 0, col 10)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 10:double) -> 11:double) -> 10:double, DoubleColUnaryMinus(col 0) -> 11:double, DoubleColDivideDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 13)(children: DoubleColMultiplyDoubleColumn(col 0, col 12)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 12:double) -> 13:double) -> 12:double, DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 12)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 12:double) -> 13:double, DoubleScal
 arSubtractDoubleColumn(val -3728.0, col 0) -> 12:double, DoubleColDivideDoubleColumn(col 4, col 2) -> 15:double
+                  Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
 PREHOOK: query: SELECT STDDEV_SAMP(csmallint),
        (STDDEV_SAMP(csmallint) - 10.175),
        STDDEV_POP(ctinyint),

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
index 0d6829f..3d0e700 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
@@ -1,3 +1,179 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT SUM(cint),
+       (SUM(cint) * -563),
+       (-3728 + SUM(cint)),
+       STDDEV_POP(cdouble),
+       (-(STDDEV_POP(cdouble))),
+       AVG(cdouble),
+       ((SUM(cint) * -563) % SUM(cint)),
+       (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)),
+       VAR_POP(cdouble),
+       (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))),
+       ((-3728 + SUM(cint)) - (SUM(cint) * -563)),
+       MIN(ctinyint),
+       MIN(ctinyint),
+       (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))))
+FROM   alltypesorc
+WHERE  (((csmallint >= cint)
+         OR ((-89010 >= ctinyint)
+             AND (cdouble > 79.553)))
+        OR ((-563 != cbigint)
+            AND ((ctinyint != cbigint)
+                 OR (-3728 >= cdouble))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT SUM(cint),
+       (SUM(cint) * -563),
+       (-3728 + SUM(cint)),
+       STDDEV_POP(cdouble),
+       (-(STDDEV_POP(cdouble))),
+       AVG(cdouble),
+       ((SUM(cint) * -563) % SUM(cint)),
+       (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)),
+       VAR_POP(cdouble),
+       (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))),
+       ((-3728 + SUM(cint)) - (SUM(cint) * -563)),
+       MIN(ctinyint),
+       MIN(ctinyint),
+       (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))))
+FROM   alltypesorc
+WHERE  (((csmallint >= cint)
+         OR ((-89010 >= ctinyint)
+             AND (cdouble > 79.553)))
+        OR ((-563 != cbigint)
+            AND ((ctinyint != cbigint)
+                 OR (-3728 >= cdouble))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterLongScalarGreaterEqualLongColumn(val -89010, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterDoubleScalar(col 5, val 79.553) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -563, col 3) -> boolean, FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val -3728.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean
+                    predicate: ((UDFToInteger(csmallint) >= cint) or ((-89010 >= UDFToInteger(ctinyint)) and (cdouble > 79.553)) or ((-563 <> cbigint) and ((UDFToLong(ctinyint) <> cbigint) or (-3728.0 >= cdouble)))) (type: boolean)
+                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double)
+                      outputColumnNames: ctinyint, cint, cdouble
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [0, 2, 5]
+                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdPopDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFAvgDouble(col 5) -> struct<count:bigint,sum:double,input:double>, VectorUDAFVarPopDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMinLong(col 0) -> tinyint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            vectorOutput: true
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumns: [0, 1, 2, 3, 4]
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumns: []
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumns: [0, 1, 2, 3, 4]
+                          Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: tinyint)
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 5]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 5
+                    dataColumns: VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:tinyint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFAvgFinal(col 2) -> double, VectorUDAFVarPopFinal(col 3) -> double, VectorUDAFMinLong(col 4) -> tinyint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumns: [0, 1, 2, 3, 4]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), (_col0 * -563) (type: bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), ((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2))) (type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [0, 5, 6, 1, 7, 2, 9, 12, 3, 11, 14, 4, 4, 16]
+                      selectExpressions: LongColMultiplyLongScalar(col 0, val -563) -> 5:long, LongScalarAddLongColumn(val -3728, col 0) -> 6:long, DoubleColUnaryMinus(col 1) -> 7:double, LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 9:long, DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 11:double) -> 12:double, DoubleColUnaryMinus(col 13)(children: DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 11:double) -> 13:double) -> 11:double, LongColSubtractLongColumn(col 8, col 10)(children: LongScalarAddLongColumn(val -3728, col 0) -> 8:long, LongColMultiplyLongScalar(col 0, val -563) -> 10:long) -> 14:long, DoubleColMult
 iplyDoubleColumn(col 13, col 15)(children: CastLongToDouble(col 4) -> 13:double, DoubleColUnaryMinus(col 16)(children: DoubleColDivideDoubleColumn(col 15, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 15:double) -> 16:double) -> 15:double) -> 16:double
+                  Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: SELECT SUM(cint),
        (SUM(cint) * -563),
        (-3728 + SUM(cint)),

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_5.q.out b/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
index 914a626..2737d9b 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
@@ -1,3 +1,174 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesorc
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesorc
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %b%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern a) -> boolean) -> boolean) -> boolean
+                    predicate: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean)
+                    Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int)
+                      outputColumnNames: ctinyint, csmallint, cint
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [0, 1, 2]
+                      Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFMaxLong(col 1) -> smallint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 1) -> smallint, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            vectorOutput: true
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumns: [0, 1, 2, 3, 4]
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumns: []
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumns: [0, 1, 2, 3, 4]
+                          Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint)
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 5, 6, 7, 9, 11]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: double
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 5
+                    dataColumns: VALUE._col0:smallint, VALUE._col1:bigint, VALUE._col2:smallint, VALUE._col3:bigint, VALUE._col4:tinyint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4)
+                Group By Vectorization:
+                    aggregators: VectorUDAFMaxLong(col 0) -> smallint, VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFMaxLong(col 4) -> tinyint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    vectorOutput: true
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumns: [0, 1, 2, 3, 4]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: smallint), (UDFToInteger(_col0) * -75) (type: int), _col1 (type: bigint), (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1)) (type: double), (6981 * UDFToInteger(_col0)) (type: int), _col2 (type: smallint), (- _col2) (type: smallint), (197.0 % (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1))) (type: double), _col3 (type: bigint), _col4 (type: tinyint), (- _col4) (type: tinyint), ((- _col4) + _col4) (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [0, 5, 1, 9, 6, 2, 10, 7, 3, 4, 11, 14]
+                      selectExpressions: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 5:long, DoubleColDivideDoubleColumn(col 7, col 8)(children: CastLongToDouble(col 6)(children: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 6:long) -> 7:double, CastLongToDouble(col 1) -> 8:double) -> 9:double, LongScalarMultiplyLongColumn(val 6981, col 0)(children: col 0) -> 6:long, LongColUnaryMinus(col 2) -> 10:long, DoubleScalarModuloDoubleColumn(val 197.0, col 12)(children: DoubleColDivideDoubleColumn(col 7, col 8)(children: CastLongToDouble(col 11)(children: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 11:long) -> 7:double, CastLongToDouble(col 1) -> 8:double) -> 12:double) -> 7:double, LongColUnaryMinus(col 4) -> 11:long, LongColAddLongColumn(col 13, col 4)(children: LongColUnaryMinus(col 4) -> 13:long) -> 14:long
+                  Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: SELECT MAX(csmallint),
        (MAX(csmallint) * -75),
        COUNT(*),

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_6.q.out b/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
index 13897f6..4906328 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
@@ -1,3 +1,114 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cboolean1,
+       cfloat,
+       cstring1,
+       (988888 * csmallint),
+       (-(csmallint)),
+       (-(cfloat)),
+       (-26.28 / cfloat),
+       (cfloat * 359),
+       (cint % ctinyint),
+       (-(cdouble)),
+       (ctinyint - -75),
+       (762 * (cint % ctinyint))
+FROM   alltypesorc
+WHERE  ((ctinyint != 0)
+        AND ((((cboolean1 <= 0)
+          AND (cboolean2 >= cboolean1))
+          OR ((cbigint IS NOT NULL)
+              AND ((cstring2 LIKE '%a')
+                   OR (cfloat <= -257))))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cboolean1,
+       cfloat,
+       cstring1,
+       (988888 * csmallint),
+       (-(csmallint)),
+       (-(cfloat)),
+       (-26.28 / cfloat),
+       (cfloat * 359),
+       (cint % ctinyint),
+       (-(cdouble)),
+       (ctinyint - -75),
+       (762 * (cint % ctinyint))
+FROM   alltypesorc
+WHERE  ((ctinyint != 0)
+        AND ((((cboolean1 <= 0)
+          AND (cboolean2 >= cboolean1))
+          OR ((cbigint IS NOT NULL)
+              AND ((cstring2 LIKE '%a')
+                   OR (cfloat <= -257))))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10, val 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 11, col 10) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 3) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %a) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -257.0) -> boolean) -> boolean) -> boolean) -> boolean) -> boolean
+                    predicate: ((ctinyint <> 0) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257))))) (type: boolean)
+                    Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28 / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [10, 4, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21]
+                          selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1)(children: col 1) -> 12:long, LongColUnaryMinus(col 1) -> 13:long, DoubleColUnaryMinus(col 4) -> 14:double, DoubleScalarDivideDoubleColumn(val -26.28, col 4)(children: col 4) -> 15:double, DoubleColMultiplyDoubleScalar(col 4, val 359.0) -> 16:double, LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 17:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColSubtractLongScalar(col 0, val -75)(children: col 0) -> 19:long, LongScalarMultiplyLongColumn(val 762, col 20)(children: LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 20:long) -> 21:long
+                      Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 10, 11]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: bigint, bigint, double, double, double, bigint, double, bigint, bigint, bigint
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: SELECT cboolean1,
        cfloat,
        cstring1,

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_7.q.out b/ql/src/test/results/clientpositive/spark/vectorization_7.q.out
index d2ff353..d49af0c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_7.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_7.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT cboolean1,
        cbigint,
        csmallint,
@@ -25,7 +25,7 @@ WHERE  ((ctinyint != 0)
 ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9
 LIMIT 25
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT cboolean1,
        cbigint,
        csmallint,
@@ -96,8 +96,10 @@ STAGE PLANS:
                         sort order: +++++++++++++++
                         Reduce Sink Vectorization:
                             className: VectorReduceSinkObjectHashOperator
+                            keyColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22]
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: []
                         Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
             Execution mode: vectorized
@@ -109,15 +111,27 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 5, 6, 7, 8, 9, 10]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: double, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint
         Reducer 2 
             Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+                reduceColumnNullOrder: aaaaaaaaaaaaaaa
+                reduceColumnSortOrder: +++++++++++++++
                 groupByVectorOutput: true
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 15
+                    dataColumns: KEY.reducesinkkey0:boolean, KEY.reducesinkkey1:bigint, KEY.reducesinkkey2:smallint, KEY.reducesinkkey3:tinyint, KEY.reducesinkkey4:timestamp, KEY.reducesinkkey5:string, KEY.reducesinkkey6:bigint, KEY.reducesinkkey7:int, KEY.reducesinkkey8:smallint, KEY.reducesinkkey9:tinyint, KEY.reducesinkkey10:int, KEY.reducesinkkey11:bigint, KEY.reducesinkkey12:int, KEY.reducesinkkey13:tinyint, KEY.reducesinkkey14:tinyint
+                    partitionColumnCount: 0
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint)

http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/spark/vectorization_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_8.q.out b/ql/src/test/results/clientpositive/spark/vectorization_8.q.out
index 927ee59..3eed209 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_8.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_8.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT ctimestamp1,
        cdouble,
        cboolean1,
@@ -23,7 +23,7 @@ WHERE  (((cstring2 IS NOT NULL)
 ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9
 LIMIT 20
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT ctimestamp1,
        cdouble,
        cboolean1,
@@ -92,8 +92,10 @@ STAGE PLANS:
                         sort order: ++++++++++++++
                         Reduce Sink Vectorization:
                             className: VectorReduceSinkObjectHashOperator
+                            keyColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21]
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: []
                         Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
             Execution mode: vectorized
@@ -105,15 +107,27 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: double, double, double, double, double, double, double, double, double, double, double
         Reducer 2 
             Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+                reduceColumnNullOrder: aaaaaaaaaaaaaa
+                reduceColumnSortOrder: ++++++++++++++
                 groupByVectorOutput: true
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 14
+                    dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:double, KEY.reducesinkkey2:boolean, KEY.reducesinkkey3:string, KEY.reducesinkkey4:float, KEY.reducesinkkey5:double, KEY.reducesinkkey6:double, KEY.reducesinkkey7:double, KEY.reducesinkkey8:float, KEY.reducesinkkey9:double, KEY.reducesinkkey10:double, KEY.reducesinkkey11:float, KEY.reducesinkkey12:float, KEY.reducesinkkey13:double
+                    partitionColumnCount: 0
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double)


Mime
View raw message