hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmccl...@apache.org
Subject hive git commit: HIVE-10729: Query failed when select complex columns from joinned table (tez map join only) (Matt McCline, reviewed by Sergey Shelukhin)
Date Tue, 29 Mar 2016 08:56:16 GMT
Repository: hive
Updated Branches:
  refs/heads/master 44ab45534 -> ff10f0337


HIVE-10729: Query failed when select complex columns from joinned table (tez map join only)
(Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff10f033
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff10f033
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff10f033

Branch: refs/heads/master
Commit: ff10f03371f5ff54d34a28938c5d6e69940113ea
Parents: 44ab455
Author: Matt McCline <mmccline@hortonworks.com>
Authored: Tue Mar 29 01:52:48 2016 -0700
Committer: Matt McCline <mmccline@hortonworks.com>
Committed: Tue Mar 29 01:54:51 2016 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../ql/exec/vector/VectorizationContext.java    |  14 +-
 .../mapjoin/VectorMapJoinCommonOperator.java    |   2 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |   7 +
 .../ql/optimizer/physical/TestVectorizer.java   |   5 +
 .../clientpositive/vector_complex_join.q        |  29 +++
 .../tez/vector_complex_join.q.out               | 227 +++++++++++++++++++
 .../clientpositive/vector_complex_join.q.out    | 225 ++++++++++++++++++
 8 files changed, 502 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 0672e0e..ed26dea 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -258,6 +258,7 @@ minitez.query.files.shared=acid_globallimit.q,\
   vector_coalesce.q,\
   vector_coalesce_2.q,\
   vector_complex_all.q,\
+  vector_complex_join.q,\
   vector_count_distinct.q,\
   vector_data_types.q,\
   vector_date_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 0552f9d..1eb960d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -342,7 +342,7 @@ public class VectorizationContext {
 
     private final Set<Integer> usedOutputColumns = new HashSet<Integer>();
 
-    int allocateOutputColumn(String hiveTypeName) {
+    int allocateOutputColumn(String hiveTypeName) throws HiveException {
         if (initialOutputCol < 0) {
           // This is a test
           return 0;
@@ -403,7 +403,7 @@ public class VectorizationContext {
     }
   }
 
-  public int allocateScratchColumn(String hiveTypeName) {
+  public int allocateScratchColumn(String hiveTypeName) throws HiveException {
     return ocm.allocateOutputColumn(hiveTypeName);
   }
 
@@ -2243,7 +2243,7 @@ public class VectorizationContext {
     }
   }
 
-  static String getNormalizedName(String hiveTypeName) {
+  static String getNormalizedName(String hiveTypeName) throws HiveException {
     VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
     switch (argType) {
     case INT_FAMILY:
@@ -2269,11 +2269,11 @@ public class VectorizationContext {
     case INTERVAL_DAY_TIME:
       return hiveTypeName;
     default:
-      return "None";
+      throw new HiveException("Unexpected hive type name " + hiveTypeName);
     }
   }
 
-  static String getUndecoratedName(String hiveTypeName) {
+  static String getUndecoratedName(String hiveTypeName) throws HiveException {
     VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
     switch (argType) {
     case INT_FAMILY:
@@ -2296,7 +2296,7 @@ public class VectorizationContext {
     case INTERVAL_DAY_TIME:
       return hiveTypeName;
     default:
-      return "None";
+      throw new HiveException("Unexpected hive type name " + hiveTypeName);
     }
   }
 
@@ -2511,7 +2511,7 @@ public class VectorizationContext {
     }
     sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", ");
 
-    sb.append("scratchColumnTypeNames ").append(getScratchColumnTypeNames().toString());
+    sb.append("scratchColumnTypeNames ").append(Arrays.toString(getScratchColumnTypeNames()));
 
     return sb.toString();
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index e26e31b..8ad7ca4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -255,7 +255,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator
implem
     determineCommonInfo(isOuterJoin);
   }
 
-  protected void determineCommonInfo(boolean isOuter) {
+  protected void determineCommonInfo(boolean isOuter) throws HiveException {
 
     bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index f674ece..d806b97 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1362,6 +1362,13 @@ public class Vectorizer implements PhysicalPlanResolver {
       LOG.info("Cannot vectorize map work value expression");
       return false;
     }
+    Byte[] order = desc.getTagOrder();
+    Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
+    List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
+    if (!validateExprNodeDesc(smallTableExprs)) {
+      LOG.info("Cannot vectorize map work small table expression");
+      return false;
+    }
     return true;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
index 5628959..9d4ca76 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
@@ -158,8 +158,13 @@ public class TestVectorizer {
       expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
       Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
       keyMap.put((byte)0, expr);
+      List<ExprNodeDesc> smallTableExpr = new ArrayList<ExprNodeDesc>();
+      smallTableExpr.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
+      keyMap.put((byte)1, smallTableExpr);
       mjdesc.setKeys(keyMap);
       mjdesc.setExprs(keyMap);
+      Byte[] order = new Byte[] {(byte) 0, (byte) 1};
+      mjdesc.setTagOrder(order);
 
       //Set filter expression
       GenericUDFOPEqual udf = new GenericUDFOPEqual();

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/queries/clientpositive/vector_complex_join.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_complex_join.q b/ql/src/test/queries/clientpositive/vector_complex_join.q
new file mode 100644
index 0000000..30f38b1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_complex_join.q
@@ -0,0 +1,29 @@
+set hive.cli.print.header=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+set hive.fetch.task.conversion=none;
+
+-- From HIVE-10729.  Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC;
+INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT
1;
+
+explain
+select * from alltypesorc join test where alltypesorc.cint=test.a;
+
+select * from alltypesorc join test where alltypesorc.cint=test.a;
+
+
+
+CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC;
+INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1;
+
+CREATE TABLE test2b (a INT) STORED AS ORC;
+INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4);
+
+explain
+select *  from test2b join test2a on test2b.a = test2a.a[1];
+
+select *  from test2b join test2a on test2b.a = test2a.a[1];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
new file mode 100644
index 0000000..dc988ef
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
@@ -0,0 +1,227 @@
+PREHOOK: query: -- From HIVE-10729.  Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: -- From HIVE-10729.  Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2")
FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test
+POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2")
FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.a SIMPLE []
+POSTHOOK: Lineage: test.b EXPRESSION []
+c0	c1
+PREHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE
Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: smallint),
cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1
(type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type:
timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8, _col9, _col10, _col11
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE
Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col2 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8, _col9, _col10, _col11, _col12, _col13
+                        input vertices:
+                          1 Map 2
+                        Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE
Column stats: NONE
+                        HybridGraceHashJoin: true
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE
Column stats: NONE
+                          table:
+                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: test
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats:
NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: map<int,string>)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column
stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column
stats: NONE
+                        value expressions: _col1 (type: map<int,string>)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+alltypesorc.ctinyint	alltypesorc.csmallint	alltypesorc.cint	alltypesorc.cbigint	alltypesorc.cfloat
alltypesorc.cdouble	alltypesorc.cstring1	alltypesorc.cstring2	alltypesorc.ctimestamp1	alltypesorc.ctimestamp2
alltypesorc.cboolean1	alltypesorc.cboolean2	test.a	test.b
+-51	NULL	199408978	-1800989684	-51.0	NULL	34N4EY63M1GFWuW0boW	P4PL5h1eXR4mMLr2	1969-12-31
16:00:08.451	NULL	false	true	199408978	{1:"val_1",2:"val_2"}
+PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2a
+POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2a
+PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test2a
+POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test2a
+POSTHOOK: Lineage: test2a.a EXPRESSION []
+c0
+PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2b
+POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2b
+PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test2b
+POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test2b
+POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+_col0
+PREHOOK: query: explain
+select *  from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *  from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test2b
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats:
NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 a (type: int)
+                        1 a[1] (type: int)
+                      outputColumnNames: _col0, _col4
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column
stats: NONE
+                      HybridGraceHashJoin: true
+                      Select Operator
+                        expressions: _col0 (type: int), _col4 (type: array<int>)
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column
stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column
stats: NONE
+                          table:
+                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: test2a
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: a[1] is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      key expressions: a[1] (type: int)
+                      sort order: +
+                      Map-reduce partition columns: a[1] (type: int)
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                      value expressions: a (type: array<int>)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test2a
+PREHOOK: Input: default@test2b
+#### A masked pattern was here ####
+POSTHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test2a
+POSTHOOK: Input: default@test2b
+#### A masked pattern was here ####
+test2b.a	test2a.a
+2	[1,2]

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/results/clientpositive/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out b/ql/src/test/results/clientpositive/vector_complex_join.q.out
new file mode 100644
index 0000000..002cdeb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_complex_join.q.out
@@ -0,0 +1,225 @@
+PREHOOK: query: -- From HIVE-10729.  Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: -- From HIVE-10729.  Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2")
FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test
+POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2")
FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.a SIMPLE []
+POSTHOOK: Lineage: test.b EXPRESSION []
+c0	c1
+PREHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_1:test 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_1:test 
+          TableScan
+            alias: test
+            Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: a is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats:
NONE
+              Select Operator
+                expressions: a (type: int), b (type: map<int,string>)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats:
NONE
+                HashTable Sink Operator
+                  keys:
+                    0 _col2 (type: int)
+                    1 _col0 (type: int)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: cint is not null (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
+              Select Operator
+                expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type:
int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type:
string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp),
cboolean1 (type: boolean), cboolean2 (type: boolean)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col2 (type: int)
+                    1 _col0 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12, _col13
+                  Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column
stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE
Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+alltypesorc.ctinyint	alltypesorc.csmallint	alltypesorc.cint	alltypesorc.cbigint	alltypesorc.cfloat
alltypesorc.cdouble	alltypesorc.cstring1	alltypesorc.cstring2	alltypesorc.ctimestamp1	alltypesorc.ctimestamp2
alltypesorc.cboolean1	alltypesorc.cboolean2	test.a	test.b
+-51	NULL	199408978	-1800989684	-51.0	NULL	34N4EY63M1GFWuW0boW	P4PL5h1eXR4mMLr2	1969-12-31
16:00:08.451	NULL	false	true	199408978	{1:"val_1",2:"val_2"}
+PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2a
+POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2a
+PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test2a
+POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test2a
+POSTHOOK: Lineage: test2a.a EXPRESSION []
+c0
+PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2b
+POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2b
+PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test2b
+POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test2b
+POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+_col0
+PREHOOK: query: explain
+select *  from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *  from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        test2b 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        test2b 
+          TableScan
+            alias: test2b
+            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: a is not null (type: boolean)
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 a (type: int)
+                  1 a[1] (type: int)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: test2a
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: a[1] is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 a (type: int)
+                  1 a[1] (type: int)
+                outputColumnNames: _col0, _col4
+                Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col4 (type: array<int>)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats:
NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats:
NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test2a
+PREHOOK: Input: default@test2b
+#### A masked pattern was here ####
+POSTHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test2a
+POSTHOOK: Input: default@test2b
+#### A masked pattern was here ####
+test2b.a	test2a.a
+2	[1,2]


Mime
View raw message