Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 66C53200BD2 for ; Sat, 3 Dec 2016 20:43:13 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 6559C160B28; Sat, 3 Dec 2016 19:43:13 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 9043C160B0F for ; Sat, 3 Dec 2016 20:43:11 +0100 (CET) Received: (qmail 65914 invoked by uid 500); 3 Dec 2016 19:43:10 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 65892 invoked by uid 99); 3 Dec 2016 19:43:10 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 03 Dec 2016 19:43:10 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D0FD3EC22D; Sat, 3 Dec 2016 19:43:09 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sunchao@apache.org To: commits@hive.apache.org Date: Sat, 03 Dec 2016 19:43:09 -0000 Message-Id: <448c8e2d00714fcc9076b37cce932817@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] hive git commit: HIVE-15057: Nested column pruning: support all operators (Chao Sun, reviewed by Ferdinand Xu) archived-at: Sat, 03 Dec 2016 19:43:13 -0000 Repository: hive Updated Branches: refs/heads/master 2feaa5dc9 -> a625bb039 http://git-wip-us.apache.org/repos/asf/hive/blob/a625bb03/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestColumnPrunerProcCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestColumnPrunerProcCtx.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestColumnPrunerProcCtx.java index dfcd154..2cfa747 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestColumnPrunerProcCtx.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestColumnPrunerProcCtx.java @@ -38,6 +38,7 @@ import java.util.Arrays; import java.util.List; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -80,11 +81,11 @@ public class TestColumnPrunerProcCtx { ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc col1 = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.booleanTypeInfo, col1, "a", false); - final List paths = Arrays.asList("_col0"); + final List paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); - List groups = ctx.getSelectNestedColPathsFromChildren(selectOperator, paths); - assertEquals(new String[] { "root.col1.a" }, groups.toArray(new String[groups.size()])); + List groups = ctx.getSelectColsFromChildren(selectOperator, paths); + compareTestResults(groups, "root.col1.a"); } // Test select root.col1 from root:struct,col2:double> @@ -94,11 +95,11 @@ public class TestColumnPrunerProcCtx { ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false); - final List paths = Arrays.asList("_col0"); + final List paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); - List groups = ctx.getSelectNestedColPathsFromChildren(selectOperator, paths); - assertEquals(new String[] { "root.col1" }, groups.toArray(new String[groups.size()])); + List groups = ctx.getSelectColsFromChildren(selectOperator, paths); + compareTestResults(groups, "root.col1"); } // Test select root.col2 from root:struct,col2:double> @@ -108,11 +109,11 @@ public class TestColumnPrunerProcCtx { ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(col1Type, colDesc, "col2", false); - final List paths = Arrays.asList("_col0"); + final List paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); - List groups = ctx.getSelectNestedColPathsFromChildren(selectOperator, paths); - assertEquals(new String[] { "root.col2" }, groups.toArray(new String[groups.size()])); + List groups = ctx.getSelectColsFromChildren(selectOperator, paths); + compareTestResults(groups, "root.col2"); } // Test select root from root:struct,col2:double> @@ -121,11 +122,11 @@ public class TestColumnPrunerProcCtx { ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); - final List paths = Arrays.asList("_col0"); + final List paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(colDesc), paths); - List groups = ctx.getSelectNestedColPathsFromChildren(selectOperator, paths); - assertEquals(new String[] { "root" }, groups.toArray(new String[groups.size()])); + List groups = ctx.getSelectColsFromChildren(selectOperator, paths); + compareTestResults(groups, "root"); } // Test select named_struct from named_struct:struct @@ -143,9 +144,9 @@ public class TestColumnPrunerProcCtx { ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, funcDesc, "foo", false); - final List paths = Arrays.asList("_col0"); + final List paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); - List groups = ctx.getSelectNestedColPathsFromChildren(selectOperator, paths); + List groups = ctx.getSelectColsFromChildren(selectOperator, paths); // Return empty result since only constant Desc exists assertEquals(0, groups.size()); } @@ -160,7 +161,7 @@ public class TestColumnPrunerProcCtx { ExprNodeDesc col1 = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, col1, "b", false); - final List paths = Arrays.asList("_col0"); + final List paths = Arrays.asList(new FieldNode("_col0")); GenericUDF udf = mock(GenericUDFBridge.class); @@ -170,8 +171,8 @@ public class TestColumnPrunerProcCtx { list); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(funcDesc), paths); - List groups = ctx.getSelectNestedColPathsFromChildren(selectOperator, paths); - assertEquals(new String[] { "root.col1.b" }, groups.toArray(new String[groups.size()])); + List groups = ctx.getSelectColsFromChildren(selectOperator, paths); + compareTestResults(groups, "root.col1.b"); } // Test select pow(root.col1.b, root.col2) from table test(root @@ -187,7 +188,7 @@ public class TestColumnPrunerProcCtx { colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc col2 = new ExprNodeFieldDesc(col2Type, colDesc, "col2", false); - final List paths = Arrays.asList("_col0"); + final List paths = Arrays.asList(new FieldNode("_col0")); GenericUDF udf = mock(GenericUDFPower.class); @@ -198,16 +199,60 @@ public class TestColumnPrunerProcCtx { list); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(funcDesc), paths); - List groups = ctx.getSelectNestedColPathsFromChildren(selectOperator, paths); - assertEquals(new String[] { "root.col1.b", "root.col2" }, groups.toArray(new String[groups - .size()])); + List groups = ctx.getSelectColsFromChildren(selectOperator, paths); + compareTestResults(groups, "root.col1.b", "root.col2"); + } + + @Test + public void testFieldNodeFromString() { + FieldNode fn = FieldNode.fromPath("s.a.b"); + assertEquals("s", fn.getFieldName()); + assertEquals(1, fn.getNodes().size()); + FieldNode childFn = fn.getNodes().get(0); + assertEquals("a", childFn.getFieldName()); + assertEquals(1, childFn.getNodes().size()); + assertEquals("b", childFn.getNodes().get(0).getFieldName()); + } + + @Test + public void testMergeFieldNode() { + FieldNode fn1 = FieldNode.fromPath("s.a.b"); + FieldNode fn2 = FieldNode.fromPath("s.a"); + assertEquals(fn2, FieldNode.mergeFieldNode(fn1, fn2)); + assertEquals(fn2, FieldNode.mergeFieldNode(fn2, fn1)); + + fn1 = FieldNode.fromPath("s.a"); + fn2 = FieldNode.fromPath("p.b"); + assertNull(FieldNode.mergeFieldNode(fn1, fn2)); + + fn1 = FieldNode.fromPath("s.a.b"); + fn2 = FieldNode.fromPath("s.a.c"); + FieldNode fn = FieldNode.mergeFieldNode(fn1, fn2); + assertEquals("s", fn.getFieldName()); + FieldNode childFn = fn.getNodes().get(0); + assertEquals("a", childFn.getFieldName()); + assertEquals(2, childFn.getNodes().size()); + assertEquals("b", childFn.getNodes().get(0).getFieldName()); + assertEquals("c", childFn.getNodes().get(1).getFieldName()); + } + + private void compareTestResults(List fieldNodes, String... paths) { + List expectedPaths = new ArrayList<>(); + for (FieldNode fn : fieldNodes) { + expectedPaths.addAll(fn.toPaths()); + } + assertEquals("Expected paths to have length " + expectedPaths + ", but got " + + paths.length, expectedPaths.size(), paths.length); + for (int i = 0; i < expectedPaths.size(); ++i) { + assertEquals("Element at index " + i + " doesn't match", expectedPaths.get(i), paths[i]); + } } private SelectOperator buildSelectOperator( List colList, - List outputColumnNames) { + List outputCols) { SelectOperator selectOperator = mock(SelectOperator.class); - SelectDesc selectDesc = new SelectDesc(colList, outputColumnNames); + SelectDesc selectDesc = new SelectDesc(colList, ColumnPrunerProcCtx.toColumnNames(outputCols)); selectDesc.setSelStarNoCompute(false); when(selectOperator.getConf()).thenReturn(selectDesc); return selectOperator; http://git-wip-us.apache.org/repos/asf/hive/blob/a625bb03/ql/src/test/queries/clientpositive/nested_column_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q b/ql/src/test/queries/clientpositive/nested_column_pruning.q new file mode 100644 index 0000000..28b974e --- /dev/null +++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q @@ -0,0 +1,112 @@ +set hive.fetch.task.conversion = none; + +-- First, create source tables +DROP TABLE IF EXISTS dummy; +CREATE TABLE dummy (i int); +INSERT INTO TABLE dummy VALUES (42); + +DROP TABLE IF EXISTS nested_tbl_1; +CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>> +) STORED AS PARQUET; + +INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))) +FROM dummy; + +DROP TABLE IF EXISTS nested_tbl_2; +CREATE TABLE nested_tbl_2 LIKE nested_tbl_1; + +INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))) +FROM dummy; + +-- Testing only select statements + +EXPLAIN SELECT a FROM nested_tbl_1; +SELECT a FROM nested_tbl_1; + +EXPLAIN SELECT s1.f1 FROM nested_tbl_1; +SELECT s1.f1 FROM nested_tbl_1; + +EXPLAIN SELECT s1.f1, s1.f2 FROM nested_tbl_1; +SELECT s1.f1, s1.f2 FROM nested_tbl_1; + +-- In this case 's1.f3' and 's1.f3.f4' should be merged +EXPLAIN SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; + +-- Testing select array and index shifting +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1; +SELECT s1.f3.f5 FROM nested_tbl_1; + +-- Testing select from multiple structs +EXPLAIN SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; + + +-- Testing select with filter + +EXPLAIN SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; + +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; + +EXPLAIN SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; + + +-- Testing lateral view + +EXPLAIN SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; + + +-- Testing UDFs +EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; + + +-- Testing aggregations + +EXPLAIN SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; + +EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; + +EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; + + +-- Testing joins + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; http://git-wip-us.apache.org/repos/asf/hive/blob/a625bb03/ql/src/test/results/clientpositive/nested_column_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/nested_column_pruning.q.out b/ql/src/test/results/clientpositive/nested_column_pruning.q.out new file mode 100644 index 0000000..5b6a8a2 --- /dev/null +++ b/ql/src/test/results/clientpositive/nested_column_pruning.q.out @@ -0,0 +1,1072 @@ +PREHOOK: query: -- First, create source tables +DROP TABLE IF EXISTS dummy +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- First, create source tables +DROP TABLE IF EXISTS dummy +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE dummy (i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: CREATE TABLE dummy (i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: INSERT INTO TABLE dummy VALUES (42) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@dummy +POSTHOOK: query: INSERT INTO TABLE dummy VALUES (42) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_1 +POSTHOOK: query: CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_1 +PREHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))) +FROM dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@nested_tbl_1 +POSTHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))) +FROM dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@nested_tbl_1 +POSTHOOK: Lineage: nested_tbl_1.a SIMPLE [] +POSTHOOK: Lineage: nested_tbl_1.s1 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s2 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s3 EXPRESSION [] +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_2 LIKE nested_tbl_1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_2 +POSTHOOK: query: CREATE TABLE nested_tbl_2 LIKE nested_tbl_1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_2 +PREHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))) +FROM dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@nested_tbl_2 +POSTHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))) +FROM dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@nested_tbl_2 +POSTHOOK: Lineage: nested_tbl_2.a SIMPLE [] +POSTHOOK: Lineage: nested_tbl_2.s1 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s2 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s3 EXPRESSION [] +PREHOOK: query: -- Testing only select statements + +EXPLAIN SELECT a FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing only select statements + +EXPLAIN SELECT a FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 +PREHOOK: query: EXPLAIN SELECT s1.f1 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f1 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f1 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f1 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f1 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +false +PREHOOK: query: EXPLAIN SELECT s1.f1, s1.f2 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f1, s1.f2 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f1 (type: boolean), s1.f2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f1, s1.f2 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f1, s1.f2 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +false foo +PREHOOK: query: -- In this case 's1.f3' and 's1.f3.f4' should be merged +EXPLAIN SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: -- In this case 's1.f3' and 's1.f3.f4' should be merged +EXPLAIN SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 4 +PREHOOK: query: -- Testing select array and index shifting +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing select array and index shifting +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: -- Testing select from multiple structs +EXPLAIN SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing select from multiple structs +EXPLAIN SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f4 (type: int), s2.f8.f9 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +4 true +PREHOOK: query: -- Testing select with filter + +EXPLAIN SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing select with filter + +EXPLAIN SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f1 = false) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f2 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +foo +PREHOOK: query: EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f3.f4 = 4) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: EXPLAIN SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and (s2.f8.f11['key1'] = true)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s2.f8 (type: struct,f11:map>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}} +PREHOOK: query: -- Testing lateral view + +EXPLAIN SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing lateral view + +EXPLAIN SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s3 (type: struct>>) + outputColumnNames: s3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col3, _col7 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int) + outputColumnNames: _col7 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col7, _col8 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), _col8 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col3.f12 (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col7, _col8 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), _col8 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: s2.f8.f10 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col3, _col7 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int) + outputColumnNames: _col7 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col7, _col8 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), _col8 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col3.f12 (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col7, _col8 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), _col8 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +10 {"f13":"foo","f14":14} +10 {"f13":"bar","f14":28} +11 {"f13":"foo","f14":14} +11 {"f13":"bar","f14":28} +PREHOOK: query: -- Testing UDFs +EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing UDFs +EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (s2.f8.f10[1] pmod s1.f3.f4) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +3 +PREHOOK: query: -- Testing aggregations + +EXPLAIN SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing aggregations + +EXPLAIN SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: s1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s1.f3.f4) + keys: s1.f3.f5 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 1 +PREHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: s1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s1.f3.f4) + keys: s1.f3 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 1 +PREHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: s1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s1.f3.f4) + keys: s1.f3 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 1 +PREHOOK: query: -- Testing joins + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +PREHOOK: type: QUERY +POSTHOOK: query: -- Testing joins + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s1.f3.f4 (type: int) + sort order: + + Map-reduce partition columns: s1.f3.f4 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: s1 (type: struct,f6:int>) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f6 is not null and (s2.f8.f9 = false)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s1.f6 (type: int) + sort order: + + Map-reduce partition columns: s1.f6 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: s2 (type: struct,f11:map>>) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 s1.f3.f4 (type: int) + 1 s1.f6 (type: int) + outputColumnNames: _col1, _col9 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1.f3.f5 (type: double), _col9.f8 (type: struct,f11:map>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +PREHOOK: Input: default@nested_tbl_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +POSTHOOK: Input: default@nested_tbl_2 +#### A masked pattern was here #### +5.0 {"f9":false,"f10":[20,22],"f11":{"key3":true,"key4":false}} +PREHOOK: query: EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s1.f3.f4 (type: int) + sort order: + + Map-reduce partition columns: s1.f3.f4 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: s1 (type: struct,f6:int>) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f6 is not null and (s2.f8.f9 = true)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s1.f6 (type: int) + sort order: + + Map-reduce partition columns: s1.f6 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: s2 (type: struct,f11:map>>) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 s1.f3.f4 (type: int) + 1 s1.f6 (type: int) + outputColumnNames: _col1, _col9 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1.f3.f5 (type: double), _col9.f8 (type: struct,f11:map>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 {"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}} http://git-wip-us.apache.org/repos/asf/hive/blob/a625bb03/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java index 3978a15..1354680 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.serde2; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; @@ -140,8 +143,6 @@ public final class ColumnProjectionUtils { newConfStr = newConfStr + StringUtils.COMMA_STR + old; } setReadNestedColumnPathConf(conf, newConfStr); - // Set READ_ALL_COLUMNS to false - conf.setBoolean(READ_ALL_COLUMNS, false); } @@ -194,18 +195,10 @@ public final class ColumnProjectionUtils { return result; } - public static List getNestedColumnPaths(Configuration conf) { + public static Set getNestedColumnPaths(Configuration conf) { String skips = conf.get(READ_NESTED_COLUMN_PATH_CONF_STR, READ_NESTED_COLUMN_PATH_CONF_STR_DEFAULT); - String[] list = StringUtils.split(skips); - List result = new ArrayList<>(list.length); - for (String element : list) { - // it may contain duplicates, remove duplicates - if (!result.contains(element)) { - result.add(element); - } - } - return result; + return new HashSet<>(Arrays.asList(StringUtils.split(skips))); } public static String[] getReadColumnNames(Configuration conf) { @@ -227,6 +220,7 @@ public final class ColumnProjectionUtils { private static void setReadNestedColumnPathConf( Configuration conf, String nestedColumnPaths) { + nestedColumnPaths = nestedColumnPaths.toLowerCase(); if (nestedColumnPaths.trim().isEmpty()) { conf.set(READ_NESTED_COLUMN_PATH_CONF_STR, READ_NESTED_COLUMN_PATH_CONF_STR_DEFAULT); } else {