Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2E2D219A11 for ; Tue, 29 Mar 2016 08:56:17 +0000 (UTC) Received: (qmail 48266 invoked by uid 500); 29 Mar 2016 08:56:17 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 48213 invoked by uid 500); 29 Mar 2016 08:56:17 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 48202 invoked by uid 99); 29 Mar 2016 08:56:17 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 29 Mar 2016 08:56:16 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id C771BDFC6E; Tue, 29 Mar 2016 08:56:16 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Message-Id: <91d10464705740809c932f92da53e4e8@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-10729: Query failed when select complex columns from joinned table (tez map join only) (Matt McCline, reviewed by Sergey Shelukhin) Date: Tue, 29 Mar 2016 08:56:16 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master 44ab45534 -> ff10f0337 HIVE-10729: Query failed when select complex columns from joinned table (tez map join only) (Matt McCline, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff10f033 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff10f033 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff10f033 Branch: refs/heads/master Commit: ff10f03371f5ff54d34a28938c5d6e69940113ea Parents: 44ab455 Author: Matt McCline Authored: Tue Mar 29 01:52:48 2016 -0700 Committer: Matt McCline Committed: Tue Mar 29 01:54:51 2016 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../ql/exec/vector/VectorizationContext.java | 14 +- .../mapjoin/VectorMapJoinCommonOperator.java | 2 +- .../hive/ql/optimizer/physical/Vectorizer.java | 7 + .../ql/optimizer/physical/TestVectorizer.java | 5 + .../clientpositive/vector_complex_join.q | 29 +++ .../tez/vector_complex_join.q.out | 227 +++++++++++++++++++ .../clientpositive/vector_complex_join.q.out | 225 ++++++++++++++++++ 8 files changed, 502 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 0672e0e..ed26dea 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -258,6 +258,7 @@ minitez.query.files.shared=acid_globallimit.q,\ vector_coalesce.q,\ vector_coalesce_2.q,\ vector_complex_all.q,\ + vector_complex_join.q,\ vector_count_distinct.q,\ vector_data_types.q,\ vector_date_1.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 0552f9d..1eb960d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -342,7 +342,7 @@ public class VectorizationContext { private final Set usedOutputColumns = new HashSet(); - int allocateOutputColumn(String hiveTypeName) { + int allocateOutputColumn(String hiveTypeName) throws HiveException { if (initialOutputCol < 0) { // This is a test return 0; @@ -403,7 +403,7 @@ public class VectorizationContext { } } - public int allocateScratchColumn(String hiveTypeName) { + public int allocateScratchColumn(String hiveTypeName) throws HiveException { return ocm.allocateOutputColumn(hiveTypeName); } @@ -2243,7 +2243,7 @@ public class VectorizationContext { } } - static String getNormalizedName(String hiveTypeName) { + static String getNormalizedName(String hiveTypeName) throws HiveException { VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); switch (argType) { case INT_FAMILY: @@ -2269,11 +2269,11 @@ public class VectorizationContext { case INTERVAL_DAY_TIME: return hiveTypeName; default: - return "None"; + throw new HiveException("Unexpected hive type name " + hiveTypeName); } } - static String getUndecoratedName(String hiveTypeName) { + static String getUndecoratedName(String hiveTypeName) throws HiveException { VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); switch (argType) { case INT_FAMILY: @@ -2296,7 +2296,7 @@ public class VectorizationContext { case INTERVAL_DAY_TIME: return hiveTypeName; default: - return "None"; + throw new HiveException("Unexpected hive type name " + hiveTypeName); } } @@ -2511,7 +2511,7 @@ public class VectorizationContext { } sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", "); - sb.append("scratchColumnTypeNames ").append(getScratchColumnTypeNames().toString()); + sb.append("scratchColumnTypeNames ").append(Arrays.toString(getScratchColumnTypeNames())); return sb.toString(); } http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index e26e31b..8ad7ca4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -255,7 +255,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem determineCommonInfo(isOuterJoin); } - protected void determineCommonInfo(boolean isOuter) { + protected void determineCommonInfo(boolean isOuter) throws HiveException { bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping"); http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f674ece..d806b97 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1362,6 +1362,13 @@ public class Vectorizer implements PhysicalPlanResolver { LOG.info("Cannot vectorize map work value expression"); return false; } + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (!validateExprNodeDesc(smallTableExprs)) { + LOG.info("Cannot vectorize map work small table expression"); + return false; + } return true; } http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 5628959..9d4ca76 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -158,8 +158,13 @@ public class TestVectorizer { expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false)); Map> keyMap = new HashMap>(); keyMap.put((byte)0, expr); + List smallTableExpr = new ArrayList(); + smallTableExpr.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false)); + keyMap.put((byte)1, smallTableExpr); mjdesc.setKeys(keyMap); mjdesc.setExprs(keyMap); + Byte[] order = new Byte[] {(byte) 0, (byte) 1}; + mjdesc.setTagOrder(order); //Set filter expression GenericUDFOPEqual udf = new GenericUDFOPEqual(); http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/queries/clientpositive/vector_complex_join.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_complex_join.q b/ql/src/test/queries/clientpositive/vector_complex_join.q new file mode 100644 index 0000000..30f38b1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_complex_join.q @@ -0,0 +1,29 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +SET hive.vectorized.execution.enabled=true; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; + +-- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC; +INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1; + +explain +select * from alltypesorc join test where alltypesorc.cint=test.a; + +select * from alltypesorc join test where alltypesorc.cint=test.a; + + + +CREATE TABLE test2a (a ARRAY) STORED AS ORC; +INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1; + +CREATE TABLE test2b (a INT) STORED AS ORC; +INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4); + +explain +select * from test2b join test2a on test2b.a = test2a.a[1]; + +select * from test2b join test2a on test2b.a = test2a.a[1]; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out new file mode 100644 index 0000000..dc988ef --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out @@ -0,0 +1,227 @@ +PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test +POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test +POSTHOOK: Lineage: test.a SIMPLE [] +POSTHOOK: Lineage: test.b EXPRESSION [] +c0 c1 +PREHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + input vertices: + 1 Map 2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: map) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: map) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@test +#### A masked pattern was here #### +alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b +-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"} +PREHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2a +POSTHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2a +PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test2a +POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test2a +POSTHOOK: Lineage: test2a.a EXPRESSION [] +c0 +PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2b +POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2b +PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test2b +POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test2b +POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 +PREHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test2b + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 a (type: int) + 1 a[1] (type: int) + outputColumnNames: _col0, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col4 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: test2a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a[1] is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a[1] (type: int) + sort order: + + Map-reduce partition columns: a[1] (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: array) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +PREHOOK: Input: default@test2a +PREHOOK: Input: default@test2b +#### A masked pattern was here #### +POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2a +POSTHOOK: Input: default@test2b +#### A masked pattern was here #### +test2b.a test2a.a +2 [1,2] http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/results/clientpositive/vector_complex_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out b/ql/src/test/results/clientpositive/vector_complex_join.q.out new file mode 100644 index 0000000..002cdeb --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test +POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test +POSTHOOK: Lineage: test.a SIMPLE [] +POSTHOOK: Lineage: test.b EXPRESSION [] +c0 c1 +PREHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test + TableScan + alias: test + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: map) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@test +#### A masked pattern was here #### +alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b +-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"} +PREHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2a +POSTHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2a +PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test2a +POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test2a +POSTHOOK: Lineage: test2a.a EXPRESSION [] +c0 +PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2b +POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2b +PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test2b +POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test2b +POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 +PREHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + test2b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + test2b + TableScan + alias: test2b + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 a (type: int) + 1 a[1] (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a[1] is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 a (type: int) + 1 a[1] (type: int) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col4 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +PREHOOK: Input: default@test2a +PREHOOK: Input: default@test2b +#### A masked pattern was here #### +POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2a +POSTHOOK: Input: default@test2b +#### A masked pattern was here #### +test2b.a test2a.a +2 [1,2]