hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject [23/28] hive git commit: HIVE-15029: Add logic to estimate stats for BETWEEN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Tue, 25 Oct 2016 21:43:42 GMT
HIVE-15029: Add logic to estimate stats for BETWEEN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2653db3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2653db3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2653db3

Branch: refs/heads/hive-14535
Commit: e2653db377ab7fff34563d348364fd0c92f359c6
Parents: 749e831
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Fri Oct 21 12:30:06 2016 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Tue Oct 25 07:13:09 2016 -0400

----------------------------------------------------------------------
 .../stats/annotation/StatsRulesProcFactory.java |  34 +++++-
 .../clientpositive/llap/explainuser_4.q.out     |  78 ++++++------
 .../llap/orc_predicate_pushdown.q.out           |  20 +--
 .../llap/parquet_predicate_pushdown.q.out       |  44 +++----
 .../llap/tez_dynpart_hashjoin_1.q.out           | 120 +++++++++---------
 .../llap/tez_vector_dynpart_hashjoin_1.q.out    | 122 +++++++++----------
 .../llap/vector_between_columns.q.out           |   6 +-
 .../clientpositive/llap/vector_between_in.q.out |  34 +++---
 .../results/clientpositive/perf/query12.q.out   |   2 +-
 .../results/clientpositive/perf/query13.q.out   |  24 ++--
 .../results/clientpositive/perf/query20.q.out   |   4 +-
 .../results/clientpositive/perf/query21.q.out   |   8 +-
 .../results/clientpositive/perf/query22.q.out   |   4 +-
 .../results/clientpositive/perf/query25.q.out   |   8 +-
 .../results/clientpositive/perf/query28.q.out   |  36 +++---
 .../results/clientpositive/perf/query29.q.out   |   4 +-
 .../results/clientpositive/perf/query32.q.out   |   8 +-
 .../results/clientpositive/perf/query34.q.out   |  12 +-
 .../results/clientpositive/perf/query40.q.out   |   8 +-
 .../results/clientpositive/perf/query48.q.out   |  18 +--
 .../results/clientpositive/perf/query51.q.out   |   8 +-
 .../results/clientpositive/perf/query54.q.out   |   4 +-
 .../results/clientpositive/perf/query58.q.out   |  12 +-
 .../results/clientpositive/perf/query64.q.out   |   8 +-
 .../results/clientpositive/perf/query65.q.out   |   8 +-
 .../results/clientpositive/perf/query66.q.out   |   8 +-
 .../results/clientpositive/perf/query67.q.out   |   4 +-
 .../results/clientpositive/perf/query68.q.out   |   4 +-
 .../results/clientpositive/perf/query70.q.out   |   8 +-
 .../results/clientpositive/perf/query73.q.out   |  12 +-
 .../results/clientpositive/perf/query79.q.out   |   4 +-
 .../results/clientpositive/perf/query80.q.out   |  12 +-
 .../results/clientpositive/perf/query82.q.out   |  14 +--
 .../results/clientpositive/perf/query85.q.out   |  42 +++----
 .../results/clientpositive/perf/query87.q.out   |  12 +-
 .../results/clientpositive/perf/query90.q.out   |  16 +--
 .../results/clientpositive/perf/query94.q.out   |   4 +-
 .../results/clientpositive/perf/query95.q.out   |   4 +-
 .../results/clientpositive/perf/query97.q.out   |   8 +-
 .../results/clientpositive/perf/query98.q.out   |   4 +-
 .../spark/vector_between_in.q.out               |  34 +++---
 .../clientpositive/tez/explainanalyze_4.q.out   |  80 ++++++------
 .../results/clientpositive/udf_between.q.out    |   8 +-
 .../clientpositive/vector_between_columns.q.out |   6 +-
 44 files changed, 475 insertions(+), 443 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index ab07fb6..aa1e509 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -71,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -89,6 +90,7 @@ import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -354,6 +356,9 @@ public class StatsRulesProcFactory {
         } else if (udf instanceof GenericUDFIn) {
           // for IN clause
           newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop);
+        } else if (udf instanceof GenericUDFBetween) {
+          // for BETWEEN clause
+          newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, fop);
         } else if (udf instanceof GenericUDFOPNot) {
           newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
         } else if (udf instanceof GenericUDFOPNotNull) {
@@ -480,6 +485,32 @@ public class StatsRulesProcFactory {
       return Math.round( (double)numRows * factor * inFactor);
     }
 
+    private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx,
+            List<String> neededCols, FilterOperator fop) throws SemanticException, CloneNotSupportedException {
+      final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
+      final boolean invert = Boolean.TRUE.equals(
+          ((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not)
+      final ExprNodeDesc comparisonExpression = fd.getChildren().get(1); // expression
+      final ExprNodeDesc leftExpression = fd.getChildren().get(2); // left expression
+      final ExprNodeDesc rightExpression = fd.getChildren().get(3); // right expression
+
+      // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true).
+      // This is more straightforward, as the evaluateExpression method will deal with
+      // generating the final row count relying on the basic comparator evaluation methods
+      final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+          new GenericUDFOPEqualOrGreaterThan(), Lists.newArrayList(comparisonExpression, leftExpression));
+      final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+          new GenericUDFOPEqualOrLessThan(), Lists.newArrayList(comparisonExpression, rightExpression));
+      ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+          new GenericUDFOPAnd(), Lists.newArrayList(leftComparator, rightComparator));
+      if (invert) {
+        newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+          new GenericUDFOPNot(), Lists.newArrayList(newExpression));
+      }
+
+      return evaluateExpression(stats, newExpression, aspCtx, neededCols, fop, 0);
+    }
+
     private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop)
         throws CloneNotSupportedException, SemanticException {
@@ -866,7 +897,8 @@ public class StatsRulesProcFactory {
         } else if (udf instanceof GenericUDFOPNull) {
           return evaluateColEqualsNullExpr(stats, genFunc);
         } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr
-            || udf instanceof GenericUDFIn || udf instanceof GenericUDFOPNot) {
+            || udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween
+            || udf instanceof GenericUDFOPNot) {
           return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount);
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
index 0978ddd..e83d6d8 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
@@ -28,27 +28,27 @@ Stage-0
     Stage-1
       Reducer 3 llap
       File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=9759 width=620)
+        Select Operator [SEL_11] (rows=2166 width=620)
           Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
         <-Reducer 2 [SIMPLE_EDGE] llap
           SHUFFLE [RS_10]
-            Merge Join Operator [MERGEJOIN_17] (rows=9759 width=620)
+            Merge Join Operator [MERGEJOIN_17] (rows=2166 width=620)
               Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
             <-Map 1 [SIMPLE_EDGE] llap
               SHUFFLE [RS_6]
                 PartitionCols:_col2
-                Select Operator [SEL_2] (rows=6144 width=251)
+                Select Operator [SEL_2] (rows=1365 width=251)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
-                  Filter Operator [FIL_15] (rows=6144 width=251)
+                  Filter Operator [FIL_15] (rows=1365 width=251)
                     predicate:cint BETWEEN 1000000 AND 3000000
                     TableScan [TS_0] (rows=12288 width=251)
                       default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
             <-Map 4 [SIMPLE_EDGE] llap
               SHUFFLE [RS_7]
                 PartitionCols:_col2
-                Select Operator [SEL_5] (rows=4586 width=251)
+                Select Operator [SEL_5] (rows=1019 width=251)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
-                  Filter Operator [FIL_16] (rows=4586 width=251)
+                  Filter Operator [FIL_16] (rows=1019 width=251)
                     predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
                     TableScan [TS_3] (rows=12288 width=251)
                       default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
@@ -113,23 +113,23 @@ Stage-0
           SHUFFLE [RS_11]
             Group By Operator [GBY_10] (rows=1 width=8)
               Output:["_col0"],aggregations:["count()"]
-              Merge Join Operator [MERGEJOIN_19] (rows=9759 width=8)
+              Merge Join Operator [MERGEJOIN_19] (rows=2166 width=8)
                 Conds:RS_6._col0=RS_7._col0(Inner)
               <-Map 1 [SIMPLE_EDGE] llap
                 SHUFFLE [RS_6]
                   PartitionCols:_col0
-                  Select Operator [SEL_2] (rows=6144 width=2)
+                  Select Operator [SEL_2] (rows=1365 width=2)
                     Output:["_col0"]
-                    Filter Operator [FIL_17] (rows=6144 width=2)
+                    Filter Operator [FIL_17] (rows=1365 width=2)
                       predicate:cint BETWEEN 1000000 AND 3000000
                       TableScan [TS_0] (rows=12288 width=2)
                         default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["cint"]
               <-Map 4 [SIMPLE_EDGE] llap
                 SHUFFLE [RS_7]
                   PartitionCols:_col0
-                  Select Operator [SEL_5] (rows=4586 width=8)
+                  Select Operator [SEL_5] (rows=1019 width=8)
                     Output:["_col0"]
-                    Filter Operator [FIL_18] (rows=4586 width=8)
+                    Filter Operator [FIL_18] (rows=1019 width=8)
                       predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
                       TableScan [TS_3] (rows=12288 width=8)
                         default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["cint","cbigint"]
@@ -182,34 +182,34 @@ Stage-0
     Stage-1
       Reducer 4 llap
       File Output Operator [FS_16]
-        Select Operator [SEL_15] (rows=2765 width=12)
+        Select Operator [SEL_15] (rows=615 width=12)
           Output:["_col0","_col1"]
         <-Reducer 3 [SIMPLE_EDGE] llap
           SHUFFLE [RS_14]
-            Group By Operator [GBY_12] (rows=2765 width=12)
+            Group By Operator [GBY_12] (rows=615 width=12)
               Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
             <-Reducer 2 [SIMPLE_EDGE] llap
               SHUFFLE [RS_11]
                 PartitionCols:_col0
-                Group By Operator [GBY_10] (rows=2765 width=12)
+                Group By Operator [GBY_10] (rows=615 width=12)
                   Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                  Merge Join Operator [MERGEJOIN_21] (rows=9759 width=4)
+                  Merge Join Operator [MERGEJOIN_21] (rows=2166 width=4)
                     Conds:RS_6._col1=RS_7._col0(Inner),Output:["_col0"]
                   <-Map 1 [SIMPLE_EDGE] llap
                     SHUFFLE [RS_6]
                       PartitionCols:_col1
-                      Select Operator [SEL_2] (rows=6144 width=5)
+                      Select Operator [SEL_2] (rows=1365 width=5)
                         Output:["_col0","_col1"]
-                        Filter Operator [FIL_19] (rows=6144 width=5)
+                        Filter Operator [FIL_19] (rows=1365 width=5)
                           predicate:cint BETWEEN 1000000 AND 3000000
                           TableScan [TS_0] (rows=12288 width=5)
                             default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["csmallint","cint"]
                   <-Map 5 [SIMPLE_EDGE] llap
                     SHUFFLE [RS_7]
                       PartitionCols:_col0
-                      Select Operator [SEL_5] (rows=4586 width=8)
+                      Select Operator [SEL_5] (rows=1019 width=8)
                         Output:["_col0"]
-                        Filter Operator [FIL_20] (rows=4586 width=8)
+                        Filter Operator [FIL_20] (rows=1019 width=8)
                           predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
                           TableScan [TS_3] (rows=12288 width=8)
                             default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["cint","cbigint"]
@@ -269,27 +269,27 @@ Stage-0
     Stage-1
       Reducer 3 llap
       File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=6758 width=215)
+        Select Operator [SEL_11] (rows=1501 width=215)
           Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
         <-Reducer 2 [SIMPLE_EDGE] llap
           SHUFFLE [RS_10]
-            Map Join Operator [MAPJOIN_17] (rows=6758 width=215)
+            Map Join Operator [MAPJOIN_17] (rows=1501 width=215)
               Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
             <-Map 4 [CUSTOM_SIMPLE_EDGE] llap
               PARTITION_ONLY_SHUFFLE [RS_7]
                 PartitionCols:_col2
-                Select Operator [SEL_5] (rows=6144 width=215)
+                Select Operator [SEL_5] (rows=1365 width=215)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
-                  Filter Operator [FIL_16] (rows=6144 width=215)
+                  Filter Operator [FIL_16] (rows=1365 width=215)
                     predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
                     TableScan [TS_3] (rows=12288 width=215)
                       default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
             <-Map 1 [CUSTOM_SIMPLE_EDGE] llap
               PARTITION_ONLY_SHUFFLE [RS_6]
                 PartitionCols:_col2
-                Select Operator [SEL_2] (rows=6144 width=215)
+                Select Operator [SEL_2] (rows=1365 width=215)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
-                  Filter Operator [FIL_15] (rows=6144 width=215)
+                  Filter Operator [FIL_15] (rows=1365 width=215)
                     predicate:cint BETWEEN 1000000 AND 3000000
                     TableScan [TS_0] (rows=12288 width=215)
                       default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
@@ -354,23 +354,23 @@ Stage-0
           SHUFFLE [RS_11]
             Group By Operator [GBY_10] (rows=1 width=8)
               Output:["_col0"],aggregations:["count()"]
-              Map Join Operator [MAPJOIN_19] (rows=6758 width=215)
+              Map Join Operator [MAPJOIN_19] (rows=1501 width=215)
                 Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true
               <-Map 4 [CUSTOM_SIMPLE_EDGE] llap
                 PARTITION_ONLY_SHUFFLE [RS_7]
                   PartitionCols:_col0
-                  Select Operator [SEL_5] (rows=6144 width=215)
+                  Select Operator [SEL_5] (rows=1365 width=215)
                     Output:["_col0"]
-                    Filter Operator [FIL_18] (rows=6144 width=215)
+                    Filter Operator [FIL_18] (rows=1365 width=215)
                       predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
                       TableScan [TS_3] (rows=12288 width=215)
                         default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["cint","cbigint"]
               <-Map 1 [CUSTOM_SIMPLE_EDGE] llap
                 PARTITION_ONLY_SHUFFLE [RS_6]
                   PartitionCols:_col0
-                  Select Operator [SEL_2] (rows=6144 width=215)
+                  Select Operator [SEL_2] (rows=1365 width=215)
                     Output:["_col0"]
-                    Filter Operator [FIL_17] (rows=6144 width=215)
+                    Filter Operator [FIL_17] (rows=1365 width=215)
                       predicate:cint BETWEEN 1000000 AND 3000000
                       TableScan [TS_0] (rows=12288 width=215)
                         default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["cint"]
@@ -423,34 +423,34 @@ Stage-0
     Stage-1
       Reducer 4 llap
       File Output Operator [FS_16]
-        Select Operator [SEL_15] (rows=3379 width=215)
+        Select Operator [SEL_15] (rows=750 width=215)
           Output:["_col0","_col1"]
         <-Reducer 3 [SIMPLE_EDGE] llap
           SHUFFLE [RS_14]
-            Group By Operator [GBY_12] (rows=3379 width=215)
+            Group By Operator [GBY_12] (rows=750 width=215)
               Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
             <-Reducer 2 [SIMPLE_EDGE] llap
               SHUFFLE [RS_11]
                 PartitionCols:_col0
-                Group By Operator [GBY_10] (rows=6758 width=215)
+                Group By Operator [GBY_10] (rows=1501 width=215)
                   Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                  Map Join Operator [MAPJOIN_21] (rows=6758 width=215)
+                  Map Join Operator [MAPJOIN_21] (rows=1501 width=215)
                     Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"]
                   <-Map 5 [CUSTOM_SIMPLE_EDGE] llap
                     PARTITION_ONLY_SHUFFLE [RS_7]
                       PartitionCols:_col0
-                      Select Operator [SEL_5] (rows=6144 width=215)
+                      Select Operator [SEL_5] (rows=1365 width=215)
                         Output:["_col0"]
-                        Filter Operator [FIL_20] (rows=6144 width=215)
+                        Filter Operator [FIL_20] (rows=1365 width=215)
                           predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
                           TableScan [TS_3] (rows=12288 width=215)
                             default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["cint","cbigint"]
                   <-Map 1 [CUSTOM_SIMPLE_EDGE] llap
                     PARTITION_ONLY_SHUFFLE [RS_6]
                       PartitionCols:_col1
-                      Select Operator [SEL_2] (rows=6144 width=215)
+                      Select Operator [SEL_2] (rows=1365 width=215)
                         Output:["_col0","_col1"]
-                        Filter Operator [FIL_19] (rows=6144 width=215)
+                        Filter Operator [FIL_19] (rows=1365 width=215)
                           predicate:cint BETWEEN 1000000 AND 3000000
                           TableScan [TS_0] (rows=12288 width=215)
                             default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["csmallint","cint"]
@@ -475,8 +475,8 @@ order by c1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
+-13036	1
 -8915	1
 -3799	1
 10782	1
--13036	1
 NULL	6

http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
index db0baee..48a86cf 100644
--- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
@@ -608,15 +608,15 @@ STAGE PLANS:
                   Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
-                    Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: string)
                         sort order: ++
-                        Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -625,10 +625,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -674,15 +674,15 @@ STAGE PLANS:
                   Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
-                    Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: string)
                         sort order: ++
-                        Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -691,10 +691,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
index 6541772..3254fb4 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
@@ -544,15 +544,15 @@ STAGE PLANS:
                   Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
-                    Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: string)
                         sort order: ++
-                        Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -561,10 +561,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -610,15 +610,15 @@ STAGE PLANS:
                   Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
-                    Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: string)
                         sort order: ++
-                        Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -627,10 +627,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -741,15 +741,15 @@ STAGE PLANS:
                   Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean)
-                    Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col3 (type: string)
                         sort order: -
-                        Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
             Execution mode: llap
@@ -760,13 +760,13 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 3
-                  Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -820,15 +820,15 @@ STAGE PLANS:
                   Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean)
-                    Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col3 (type: string)
                         sort order: -
-                        Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
             Execution mode: llap
@@ -839,13 +839,13 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 3
-                  Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
index 5c8db64..25c6f15 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
@@ -36,16 +36,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col2 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col2 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
             Execution mode: llap
             LLAP IO: all inputs
@@ -56,16 +56,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
-                    Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col2 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col2 (type: int)
-                        Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
             Execution mode: llap
             LLAP IO: all inputs
@@ -79,11 +79,11 @@ STAGE PLANS:
                   0 _col2 (type: int)
                   1 _col2 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
-                Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col2 (type: int)
                   sort order: +
-                  Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
                   value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean)
         Reducer 3 
             Execution mode: llap
@@ -91,10 +91,10 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
-                Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -168,16 +168,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: all inputs
         Map 4 
@@ -187,16 +187,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
-                    Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -208,7 +208,7 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
-                Statistics: Num rows: 9759 Data size: 78072 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2166 Data size: 17328 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: count()
                   mode: hash
@@ -296,16 +296,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 73396 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: csmallint (type: smallint), cint (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col1 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col1 (type: int)
-                        Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: smallint)
             Execution mode: llap
             LLAP IO: all inputs
@@ -316,16 +316,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
-                    Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -338,18 +338,18 @@ STAGE PLANS:
                   0 _col1 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 9759 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2166 Data size: 8664 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: count()
                   keys: _col0 (type: smallint)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: smallint)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: smallint)
-                    Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: bigint)
         Reducer 3 
             Execution mode: llap
@@ -359,11 +359,11 @@ STAGE PLANS:
                 keys: KEY._col0 (type: smallint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col1 (type: bigint)
                   sort order: +
-                  Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
                   value expressions: _col0 (type: smallint)
         Reducer 4 
             Execution mode: llap
@@ -371,10 +371,10 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -449,16 +449,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col2 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col2 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
             Execution mode: llap
             LLAP IO: all inputs
@@ -469,16 +469,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col2 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col2 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
             Execution mode: llap
             LLAP IO: all inputs
@@ -494,12 +494,12 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
                 input vertices:
                   1 Map 4
-                Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                 HybridGraceHashJoin: true
                 Reduce Output Operator
                   key expressions: _col2 (type: int)
                   sort order: +
-                  Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean)
         Reducer 3 
             Execution mode: llap
@@ -507,10 +507,10 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
-                Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -584,16 +584,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: all inputs
         Map 4 
@@ -603,16 +603,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -626,7 +626,7 @@ STAGE PLANS:
                   1 KEY.reducesinkkey0 (type: int)
                 input vertices:
                   1 Map 4
-                Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                 HybridGraceHashJoin: true
                 Group By Operator
                   aggregations: count()
@@ -715,16 +715,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: csmallint (type: smallint), cint (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col1 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col1 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: smallint)
             Execution mode: llap
             LLAP IO: all inputs
@@ -735,16 +735,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -759,19 +759,19 @@ STAGE PLANS:
                 outputColumnNames: _col0
                 input vertices:
                   1 Map 5
-                Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                 HybridGraceHashJoin: true
                 Group By Operator
                   aggregations: count()
                   keys: _col0 (type: smallint)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: smallint)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: smallint)
-                    Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: bigint)
         Reducer 3 
             Execution mode: llap
@@ -781,11 +781,11 @@ STAGE PLANS:
                 keys: KEY._col0 (type: smallint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: smallint)
                   sort order: +
-                  Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: bigint)
         Reducer 4 
             Execution mode: llap
@@ -793,10 +793,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat


Mime
View raw message