hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sunc...@apache.org
Subject hive git commit: HIVE-15507: Nested column pruning: fix issue when selecting struct field from array/map element (Chao Sun, reviewed by Ferdinand Xu)
Date Mon, 02 Jan 2017 21:03:12 GMT
Repository: hive
Updated Branches:
  refs/heads/master a241e55a4 -> 5d45974e9


HIVE-15507: Nested column pruning: fix issue when selecting struct field from array/map element (Chao Sun, reviewed by Ferdinand Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5d45974e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5d45974e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5d45974e

Branch: refs/heads/master
Commit: 5d45974e943b6a35f1428632283b6b9e20357485
Parents: a241e55
Author: Chao Sun <sunchao@apache.org>
Authored: Fri Dec 23 09:35:20 2016 -0800
Committer: Chao Sun <sunchao@apache.org>
Committed: Mon Jan 2 13:01:51 2017 -0800

----------------------------------------------------------------------
 .../ql/io/parquet/serde/ParquetHiveSerDe.java   |   2 +-
 .../hive/ql/optimizer/ColumnPrunerProcCtx.java  |  20 +-
 .../clientpositive/nested_column_pruning.q      |  29 +-
 .../clientpositive/nested_column_pruning.q.out  | 404 +++++++++++++------
 4 files changed, 329 insertions(+), 126 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5d45974e/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
index a124938..5870a50 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
@@ -217,7 +217,7 @@ public class ParquetHiveSerDe extends AbstractSerDe {
 
   private static void pruneFromSinglePath(PrunedStructTypeInfo prunedInfo, String path) {
     Preconditions.checkArgument(prunedInfo != null,
-      "PrunedStructTypeInfo for path " + path + " should not be null");
+      "PrunedStructTypeInfo for path '" + path + "' should not be null");
 
     int index = path.indexOf('.');
     if (index < 0) {

http://git-wip-us.apache.org/repos/asf/hive/blob/5d45974e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index e9af7a7..2a0c469 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -39,8 +39,10 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex;
 
 import static org.apache.hadoop.hive.ql.optimizer.FieldNode.mergeFieldNodes;
 
@@ -242,10 +244,24 @@ public class ColumnPrunerProcCtx implements NodeProcessorCtx {
       p.addFieldNodes(pathToRoot);
       paths.add(p);
     } else if (desc instanceof ExprNodeFieldDesc) {
-      String f = ((ExprNodeFieldDesc) desc).getFieldName();
+      ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) desc;
+      ExprNodeDesc childDesc = fieldDesc.getDesc();
+
+      // Check cases for arr[i].f and map[key].v
+      // For these we should not generate paths like arr.f or map.v
+      // Otherwise we would have a mismatch between type info and path
+      if (childDesc instanceof ExprNodeGenericFuncDesc) {
+        ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) childDesc;
+        if (funcDesc.getGenericUDF() instanceof GenericUDFIndex) {
+          getNestedColsFromExprNodeDesc(funcDesc, pathToRoot, paths);
+          return;
+        }
+      }
+
+      String f = fieldDesc.getFieldName();
       FieldNode p = new FieldNode(f);
       p.addFieldNodes(pathToRoot);
-      getNestedColsFromExprNodeDesc(((ExprNodeFieldDesc) desc).getDesc(), p, paths);
+      getNestedColsFromExprNodeDesc(childDesc, p, paths);
     } else {
       List<ExprNodeDesc> children = desc.getChildren();
       if (children == null || children.isEmpty()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/5d45974e/ql/src/test/queries/clientpositive/nested_column_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q b/ql/src/test/queries/clientpositive/nested_column_pruning.q
index b08b356..35de3ed 100644
--- a/ql/src/test/queries/clientpositive/nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q
@@ -11,13 +11,15 @@ CREATE TABLE nested_tbl_1 (
   a int,
   s1 struct<f1: boolean, f2: string, f3: struct<f4: int, f5: double>, f6: int>,
   s2 struct<f7: string, f8: struct<f9 : boolean, f10: array<int>, f11: map<string, boolean>>>,
-  s3 struct<f12: array<struct<f13:string, f14:int>>>
+  s3 struct<f12: array<struct<f13:string, f14:int>>>,
+  s4 map<string, struct<f15:int>>
 ) STORED AS PARQUET;
 
 INSERT INTO TABLE nested_tbl_1 SELECT
   1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4),
   named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))),
-  named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28)))
+  named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))),
+  map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2))
 FROM dummy;
 
 DROP TABLE IF EXISTS nested_tbl_2;
@@ -26,7 +28,8 @@ CREATE TABLE nested_tbl_2 LIKE nested_tbl_1;
 INSERT INTO TABLE nested_tbl_2 SELECT
   2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4),
   named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))),
-  named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56)))
+  named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))),
+  map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4))
 FROM dummy;
 
 -- Testing only select statements
@@ -122,3 +125,23 @@ SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
 FROM nested_tbl_1;
 
 SELECT * FROM nested_tbl_3;
+
+-- Testing select struct field from elements in array or map
+
+EXPLAIN
+SELECT count(s1.f6), s3.f12[0].f14
+FROM nested_tbl_1
+GROUP BY s3.f12[0].f14;
+
+SELECT count(s1.f6), s3.f12[0].f14
+FROM nested_tbl_1
+GROUP BY s3.f12[0].f14;
+
+EXPLAIN
+SELECT count(s1.f6), s4['key1'].f15
+FROM nested_tbl_1
+GROUP BY s4['key1'].f15;
+
+SELECT count(s1.f6), s4['key1'].f15
+FROM nested_tbl_1
+GROUP BY s4['key1'].f15;

http://git-wip-us.apache.org/repos/asf/hive/blob/5d45974e/ql/src/test/results/clientpositive/nested_column_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nested_column_pruning.q.out b/ql/src/test/results/clientpositive/nested_column_pruning.q.out
index 8d32df5..da2908c 100644
--- a/ql/src/test/results/clientpositive/nested_column_pruning.q.out
+++ b/ql/src/test/results/clientpositive/nested_column_pruning.q.out
@@ -27,7 +27,8 @@ PREHOOK: query: CREATE TABLE nested_tbl_1 (
   a int,
   s1 struct<f1: boolean, f2: string, f3: struct<f4: int, f5: double>, f6: int>,
   s2 struct<f7: string, f8: struct<f9 : boolean, f10: array<int>, f11: map<string, boolean>>>,
-  s3 struct<f12: array<struct<f13:string, f14:int>>>
+  s3 struct<f12: array<struct<f13:string, f14:int>>>,
+  s4 map<string, struct<f15:int>>
 ) STORED AS PARQUET
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -36,7 +37,8 @@ POSTHOOK: query: CREATE TABLE nested_tbl_1 (
   a int,
   s1 struct<f1: boolean, f2: string, f3: struct<f4: int, f5: double>, f6: int>,
   s2 struct<f7: string, f8: struct<f9 : boolean, f10: array<int>, f11: map<string, boolean>>>,
-  s3 struct<f12: array<struct<f13:string, f14:int>>>
+  s3 struct<f12: array<struct<f13:string, f14:int>>>,
+  s4 map<string, struct<f15:int>>
 ) STORED AS PARQUET
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
@@ -44,7 +46,8 @@ POSTHOOK: Output: default@nested_tbl_1
 PREHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT
   1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4),
   named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))),
-  named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28)))
+  named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))),
+  map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2))
 FROM dummy
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dummy
@@ -52,7 +55,8 @@ PREHOOK: Output: default@nested_tbl_1
 POSTHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT
   1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4),
   named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))),
-  named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28)))
+  named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))),
+  map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2))
 FROM dummy
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dummy
@@ -61,6 +65,7 @@ POSTHOOK: Lineage: nested_tbl_1.a SIMPLE []
 POSTHOOK: Lineage: nested_tbl_1.s1 EXPRESSION []
 POSTHOOK: Lineage: nested_tbl_1.s2 EXPRESSION []
 POSTHOOK: Lineage: nested_tbl_1.s3 EXPRESSION []
+POSTHOOK: Lineage: nested_tbl_1.s4 EXPRESSION []
 PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_2
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_2
@@ -76,7 +81,8 @@ POSTHOOK: Output: default@nested_tbl_2
 PREHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT
   2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4),
   named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))),
-  named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56)))
+  named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))),
+  map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4))
 FROM dummy
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dummy
@@ -84,7 +90,8 @@ PREHOOK: Output: default@nested_tbl_2
 POSTHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT
   2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4),
   named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))),
-  named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56)))
+  named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))),
+  map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4))
 FROM dummy
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dummy
@@ -93,6 +100,7 @@ POSTHOOK: Lineage: nested_tbl_2.a SIMPLE []
 POSTHOOK: Lineage: nested_tbl_2.s1 EXPRESSION []
 POSTHOOK: Lineage: nested_tbl_2.s2 EXPRESSION []
 POSTHOOK: Lineage: nested_tbl_2.s3 EXPRESSION []
+POSTHOOK: Lineage: nested_tbl_2.s4 EXPRESSION []
 PREHOOK: query: -- Testing only select statements
 
 EXPLAIN SELECT a FROM nested_tbl_1
@@ -111,14 +119,14 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: nested_tbl_1
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: a (type: int)
               outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -154,14 +162,14 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f1
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f1 (type: boolean)
               outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -197,14 +205,14 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f1, s1.f2
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f1 (type: boolean), s1.f2 (type: string)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -242,14 +250,14 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f3
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f3 (type: struct<f4:int,f5:double>), s1.f3.f4 (type: int)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -287,14 +295,14 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f3.f5
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f3.f5 (type: double)
               outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -332,14 +340,14 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f3.f4, s2.f8.f9
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f3.f4 (type: int), s2.f8.f9 (type: boolean)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -379,17 +387,17 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f2, s1.f1
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (s1.f1 = false) (type: boolean)
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: s1.f2 (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -425,17 +433,17 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f3.f5, s1.f3.f4
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (s1.f3.f4 = 4) (type: boolean)
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: s1.f3.f5 (type: double)
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -471,17 +479,17 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f2, s2.f8
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and s2.f8.f11['key1']) (type: boolean)
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: s2.f8 (type: struct<f9:boolean,f10:array<int>,f11:map<string,boolean>>)
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -525,32 +533,32 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s2.f8.f10
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Lateral View Forward
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: s3 (type: struct<f12:array<struct<f13:string,f14:int>>>)
                 outputColumnNames: s3
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 Lateral View Join Operator
-                  outputColumnNames: _col3, _col7
-                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  outputColumnNames: _col3, _col8
+                  Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                   Lateral View Forward
-                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: _col7 (type: int)
-                      outputColumnNames: _col7
-                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      expressions: _col8 (type: int)
+                      outputColumnNames: _col8
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                       Lateral View Join Operator
-                        outputColumnNames: _col7, _col8
-                        Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                        outputColumnNames: _col8, _col9
+                        Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
-                          expressions: _col7 (type: int), _col8 (type: struct<f13:string,f14:int>)
+                          expressions: _col8 (type: int), _col9 (type: struct<f13:string,f14:int>)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                           File Output Operator
                             compressed: false
-                            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                             table:
                                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -558,20 +566,20 @@ STAGE PLANS:
                     Select Operator
                       expressions: _col3.f12 (type: array<struct<f13:string,f14:int>>)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                       UDTF Operator
-                        Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                         function name: explode
                         Lateral View Join Operator
-                          outputColumnNames: _col7, _col8
-                          Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                          outputColumnNames: _col8, _col9
+                          Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                           Select Operator
-                            expressions: _col7 (type: int), _col8 (type: struct<f13:string,f14:int>)
+                            expressions: _col8 (type: int), _col9 (type: struct<f13:string,f14:int>)
                             outputColumnNames: _col0, _col1
-                            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                             File Output Operator
                               compressed: false
-                              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                               table:
                                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -579,29 +587,29 @@ STAGE PLANS:
               Select Operator
                 expressions: s2.f8.f10 (type: array<int>)
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 UDTF Operator
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   function name: explode
                   Lateral View Join Operator
-                    outputColumnNames: _col3, _col7
-                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    outputColumnNames: _col3, _col8
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                     Lateral View Forward
-                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                       Select Operator
-                        expressions: _col7 (type: int)
-                        outputColumnNames: _col7
-                        Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        expressions: _col8 (type: int)
+                        outputColumnNames: _col8
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                         Lateral View Join Operator
-                          outputColumnNames: _col7, _col8
-                          Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                          outputColumnNames: _col8, _col9
+                          Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                           Select Operator
-                            expressions: _col7 (type: int), _col8 (type: struct<f13:string,f14:int>)
+                            expressions: _col8 (type: int), _col9 (type: struct<f13:string,f14:int>)
                             outputColumnNames: _col0, _col1
-                            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                             File Output Operator
                               compressed: false
-                              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                               table:
                                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -609,20 +617,20 @@ STAGE PLANS:
                       Select Operator
                         expressions: _col3.f12 (type: array<struct<f13:string,f14:int>>)
                         outputColumnNames: _col0
-                        Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                         UDTF Operator
-                          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                           function name: explode
                           Lateral View Join Operator
-                            outputColumnNames: _col7, _col8
-                            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                            outputColumnNames: _col8, _col9
+                            Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                             Select Operator
-                              expressions: _col7 (type: int), _col8 (type: struct<f13:string,f14:int>)
+                              expressions: _col8 (type: int), _col9 (type: struct<f13:string,f14:int>)
                               outputColumnNames: _col0, _col1
-                              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                               File Output Operator
                                 compressed: false
-                                Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                                Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE
                                 table:
                                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -667,14 +675,14 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s2.f8.f10, s1.f3.f4
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: (s2.f8.f10[1] pmod s1.f3.f4) (type: int)
               outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -714,22 +722,22 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f3.f5, s1.f3.f4
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f3.f5 (type: double), s1.f3.f4 (type: int)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count(_col1)
                 keys: _col0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: double)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: double)
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -737,10 +745,10 @@ STAGE PLANS:
           keys: KEY._col0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -776,22 +784,22 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f3
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f3 (type: struct<f4:int,f5:double>), s1.f3.f4 (type: int)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count(_col1)
                 keys: _col0 (type: struct<f4:int,f5:double>)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: struct<f4:int,f5:double>)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: struct<f4:int,f5:double>)
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -799,10 +807,10 @@ STAGE PLANS:
           keys: KEY._col0 (type: struct<f4:int,f5:double>)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -839,22 +847,22 @@ STAGE PLANS:
           TableScan
             alias: nested_tbl_1
             Pruned Column Paths: s1.f3
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1.f3 (type: struct<f4:int,f5:double>), s1.f3.f4 (type: int)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count(_col1)
                 keys: _col0 (type: struct<f4:int,f5:double>)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: struct<f4:int,f5:double>)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: struct<f4:int,f5:double>)
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -862,7 +870,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: struct<f4:int,f5:double>)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -877,16 +885,16 @@ STAGE PLANS:
             Reduce Output Operator
               key expressions: _col0 (type: struct<f4:int,f5:double>)
               sort order: +
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: struct<f4:int,f5:double>), VALUE._col0 (type: bigint)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -931,32 +939,32 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: t1
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>)
               outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
                 key expressions: _col0.f3.f4 (type: int)
                 sort order: +
                 Map-reduce partition columns: _col0.f3.f4 (type: int)
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 value expressions: _col0 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>)
           TableScan
             alias: t2
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (s2.f8.f9 = false) (type: boolean)
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: s1 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s2 (type: struct<f7:string,f8:struct<f9:boolean,f10:array<int>,f11:map<string,boolean>>>)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0.f6 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0.f6 (type: int)
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: struct<f7:string,f8:struct<f9:boolean,f10:array<int>,f11:map<string,boolean>>>)
       Reduce Operator Tree:
         Join Operator
@@ -966,14 +974,14 @@ STAGE PLANS:
             0 _col0.f3.f4 (type: int)
             1 _col0.f6 (type: int)
           outputColumnNames: _col0, _col2
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct<f9:boolean,f10:array<int>,f11:map<string,boolean>>)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1022,32 +1030,32 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: t1
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: s1 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>)
               outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
                 key expressions: _col0.f3.f4 (type: int)
                 sort order: +
                 Map-reduce partition columns: _col0.f3.f4 (type: int)
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 value expressions: _col0 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>)
           TableScan
             alias: t2
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (s2.f8.f9 = true) (type: boolean)
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: s1 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s2 (type: struct<f7:string,f8:struct<f9:boolean,f10:array<int>,f11:map<string,boolean>>>)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0.f6 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0.f6 (type: int)
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: struct<f7:string,f8:struct<f9:boolean,f10:array<int>,f11:map<string,boolean>>>)
       Reduce Operator Tree:
         Join Operator
@@ -1057,14 +1065,14 @@ STAGE PLANS:
             0 _col0.f3.f4 (type: int)
             1 _col0.f6 (type: int)
           outputColumnNames: _col0, _col2
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct<f9:boolean,f10:array<int>,f11:map<string,boolean>>)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1132,3 +1140,159 @@ POSTHOOK: Input: default@nested_tbl_3
 POSTHOOK: Input: default@nested_tbl_3@f3=4
 #### A masked pattern was here ####
 false	foo	4
+PREHOOK: query: -- Testing select struct field from elements in array or map
+
+EXPLAIN
+SELECT count(s1.f6), s3.f12[0].f14
+FROM nested_tbl_1
+GROUP BY s3.f12[0].f14
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Testing select struct field from elements in array or map
+
+EXPLAIN
+SELECT count(s1.f6), s3.f12[0].f14
+FROM nested_tbl_1
+GROUP BY s3.f12[0].f14
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: nested_tbl_1
+            Pruned Column Paths: s3.f12, s1.f6
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: s3.f12[0].f14 (type: int), s1.f6 (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(_col1)
+                keys: _col0 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint), _col0 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(s1.f6), s3.f12[0].f14
+FROM nested_tbl_1
+GROUP BY s3.f12[0].f14
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(s1.f6), s3.f12[0].f14
+FROM nested_tbl_1
+GROUP BY s3.f12[0].f14
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+1	14
+PREHOOK: query: EXPLAIN
+SELECT count(s1.f6), s4['key1'].f15
+FROM nested_tbl_1
+GROUP BY s4['key1'].f15
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(s1.f6), s4['key1'].f15
+FROM nested_tbl_1
+GROUP BY s4['key1'].f15
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: nested_tbl_1
+            Pruned Column Paths: s1.f6
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: s4['key1'].f15 (type: int), s1.f6 (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(_col1)
+                keys: _col0 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint), _col0 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(s1.f6), s4['key1'].f15
+FROM nested_tbl_1
+GROUP BY s4['key1'].f15
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(s1.f6), s4['key1'].f15
+FROM nested_tbl_1
+GROUP BY s4['key1'].f15
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+1	1


Mime
View raw message