hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-12584: Vectorized join with partition column of type char does not trim spaces (Prasanth Jayachandran reviewed by Matt McCline)
Date Fri, 04 Dec 2015 20:16:32 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 09e7b19bf -> fd378c3e7


HIVE-12584: Vectorized join with partition column of type char does not trim spaces (Prasanth
Jayachandran reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fd378c3e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fd378c3e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fd378c3e

Branch: refs/heads/branch-1
Commit: fd378c3e71aadaeac86d9a0e3611e9abcf60178d
Parents: 09e7b19
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Fri Dec 4 14:16:16 2015 -0600
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Fri Dec 4 14:16:16 2015 -0600

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../ql/exec/vector/VectorizedRowBatchCtx.java   |  13 +-
 .../clientpositive/vector_join_part_col_char.q  |  27 +++
 .../tez/vector_join_part_col_char.q.out         | 197 ++++++++++++++++++
 .../vector_join_part_col_char.q.out             | 198 +++++++++++++++++++
 5 files changed, 433 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/fd378c3e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 8421106..ddff514 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -365,6 +365,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
   tez_smb_main.q,\
   tez_smb_1.q,\
   tez_smb_empty.q,\
+  vector_join_part_col_char.q,\
   vectorized_dynamic_partition_pruning.q,\
   tez_multi_union.q,\
   tez_join.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/fd378c3e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index 82d4a8f..81ab129 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -34,6 +34,7 @@ import java.util.regex.Pattern;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
 import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
@@ -57,7 +58,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.Pr
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.DataOutputBuffer;
@@ -226,14 +229,18 @@ public class VectorizedRowBatchCtx {
           partitionTypes.put(key, PrimitiveCategory.STRING);       
         } else {
           // Create a Standard java object Inspector
+          PrimitiveTypeInfo partColTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]);
           objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
-              TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
+              partColTypeInfo);
           objectVal = 
               ObjectInspectorConverters.
               getConverter(PrimitiveObjectInspectorFactory.
                   javaStringObjectInspector, objectInspector).
-                  convert(partSpec.get(key));              
-          partitionTypes.put(key, TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]).getPrimitiveCategory());
+                  convert(partSpec.get(key));
+          if (partColTypeInfo instanceof CharTypeInfo) {
+            objectVal = ((HiveChar) objectVal).getStrippedValue();
+          }
+          partitionTypes.put(key, partColTypeInfo.getPrimitiveCategory());
         }
         if (LOG.isDebugEnabled()) {
           LOG.debug("Partition column: name: " + key + ", value: " + objectVal + ", type:
" + partitionTypes.get(key));

http://git-wip-us.apache.org/repos/asf/hive/blob/fd378c3e/ql/src/test/queries/clientpositive/vector_join_part_col_char.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_part_col_char.q b/ql/src/test/queries/clientpositive/vector_join_part_col_char.q
new file mode 100644
index 0000000..45a9165
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_part_col_char.q
@@ -0,0 +1,27 @@
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.vectorized.execution.enabled=true;
+
+drop table if exists char_part_tbl1 ;
+drop table if exists char_part_tbl2;
+
+create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets
stored as orc tblproperties('transactional'='true');
+insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00),
('(yuri xylophone',30,2.74),('alice underhill',46,3.50);
+
+create table char_tbl1(name string, age int) partitioned  by(gpa char(50)) stored as orc;
+create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc;
+
+insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa
= 3.5;
+insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa
= 2.5;
+insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa
= 3.5;
+insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa
= 3;
+
+show partitions char_tbl1;
+show partitions char_tbl2;
+
+explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2
c2 on (c1.gpa = c2.gpa);
+select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2
c2 on (c1.gpa = c2.gpa);
+
+set hive.vectorized.execution.enabled=false;
+select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2
c2 on (c1.gpa = c2.gpa);

http://git-wip-us.apache.org/repos/asf/hive/blob/fd378c3e/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out
new file mode 100644
index 0000000..f3d5931
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out
@@ -0,0 +1,197 @@
+PREHOOK: query: drop table if exists char_part_tbl1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_part_tbl1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists char_part_tbl2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_part_tbl2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age)
into 2 buckets stored as orc tblproperties('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@studenttab
+POSTHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age)
into 2 buckets stored as orc tblproperties('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@studenttab
+PREHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00),
('(yuri xylophone',30,2.74),('alice underhill',46,3.50)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@studenttab
+POSTHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00),
('(yuri xylophone',30,2.74),('alice underhill',46,3.50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@studenttab
+POSTHOOK: Lineage: studenttab.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: studenttab.gpa EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: studenttab.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+PREHOOK: query: create table char_tbl1(name string, age int) partitioned  by(gpa char(50))
stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@char_tbl1
+POSTHOOK: query: create table char_tbl1(name string, age int) partitioned  by(gpa char(50))
stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@char_tbl1
+PREHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5))
stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@char_tbl2
+POSTHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5))
stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@char_tbl2
+PREHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl1@gpa=3.5                                              

+POSTHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl1@gpa=3.5                                             
 
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5                                          
    ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5                                          
    ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null),
]
+PREHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab
where gpa = 2.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl1@gpa=2.5                                              

+POSTHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab
where gpa = 2.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl1@gpa=2.5                                             
 
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5                                          
    ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5                                          
    ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null),
]
+PREHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl2@gpa=3.5  
+POSTHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl2@gpa=3.5  
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5  ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age,
type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5  ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name,
type:string, comment:null), ]
+PREHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab
where gpa = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl2@gpa=3    
+POSTHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab
where gpa = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl2@gpa=3    
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3    ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age,
type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3    ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name,
type:string, comment:null), ]
+PREHOOK: query: show partitions char_tbl1
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@char_tbl1
+POSTHOOK: query: show partitions char_tbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@char_tbl1
+gpa=2.5
+gpa=3.5
+PREHOOK: query: show partitions char_tbl2
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@char_tbl2
+POSTHOOK: query: show partitions char_tbl2
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@char_tbl2
+gpa=3
+gpa=3.5
+PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1
c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1
c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+
+Stage-0
+   Fetch Operator
+      limit:-1
+      Stage-1
+         Reducer 2
+         File Output Operator [FS_10]
+            compressed:false
+            Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+            table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+            Merge Join Operator [MERGEJOIN_21]
+            |  condition map:[{"":"Inner Join 0 to 1"}]
+            |  keys:{"0":"_col2 (type: char(50))","1":"_col2 (type: char(50))"}
+            |  outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
+            |  Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats:
NONE
+            |<-Map 1 [SIMPLE_EDGE] vectorized
+            |  Reduce Output Operator [RS_23]
+            |     key expressions:_col2 (type: char(50))
+            |     Map-reduce partition columns:_col2 (type: char(50))
+            |     sort order:+
+            |     Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats:
NONE
+            |     value expressions:_col0 (type: string), _col1 (type: int)
+            |     Select Operator [OP_22]
+            |        outputColumnNames:["_col0","_col1","_col2"]
+            |        Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats:
NONE
+            |        TableScan [TS_0]
+            |           alias:c1
+            |           Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column
stats: NONE
+            |  Dynamic Partitioning Event Operator [EVENT_20]
+            |     Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats:
NONE
+            |     Group By Operator [OP_25]
+            |        keys:_col0 (type: char(50))
+            |        outputColumnNames:["_col0"]
+            |        Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats:
NONE
+            |        Select Operator [OP_24]
+            |           outputColumnNames:["_col0"]
+            |           Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column
stats: NONE
+            |            Please refer to the previous Select Operator [OP_22]
+            |<-Map 3 [SIMPLE_EDGE] vectorized
+               Reduce Output Operator [RS_27]
+                  key expressions:_col2 (type: char(50))
+                  Map-reduce partition columns:_col2 (type: char(50))
+                  sort order:+
+                  Statistics:Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions:_col0 (type: string), _col1 (type: int), _col2 (type:
char(5))
+                  Select Operator [OP_26]
+                     outputColumnNames:["_col0","_col1","_col2"]
+                     Statistics:Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats:
NONE
+                     TableScan [TS_2]
+                        alias:c2
+                        Statistics:Num rows: 2 Data size: 203 Basic stats: COMPLETE Column
stats: NONE
+
+PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_tbl1
+PREHOOK: Input: default@char_tbl1@gpa=2.5
+PREHOOK: Input: default@char_tbl1@gpa=3.5
+PREHOOK: Input: default@char_tbl2
+PREHOOK: Input: default@char_tbl2@gpa=3
+PREHOOK: Input: default@char_tbl2@gpa=3.5
+#### A masked pattern was here ####
+POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_tbl1
+POSTHOOK: Input: default@char_tbl1@gpa=2.5
+POSTHOOK: Input: default@char_tbl1@gpa=3.5
+POSTHOOK: Input: default@char_tbl2
+POSTHOOK: Input: default@char_tbl2@gpa=3
+POSTHOOK: Input: default@char_tbl2@gpa=3.5
+#### A masked pattern was here ####
+alice underhill	46	3.5                                               	alice underhill	46
3.5  
+PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_tbl1
+PREHOOK: Input: default@char_tbl1@gpa=2.5
+PREHOOK: Input: default@char_tbl1@gpa=3.5
+PREHOOK: Input: default@char_tbl2
+PREHOOK: Input: default@char_tbl2@gpa=3
+PREHOOK: Input: default@char_tbl2@gpa=3.5
+#### A masked pattern was here ####
+POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_tbl1
+POSTHOOK: Input: default@char_tbl1@gpa=2.5
+POSTHOOK: Input: default@char_tbl1@gpa=3.5
+POSTHOOK: Input: default@char_tbl2
+POSTHOOK: Input: default@char_tbl2@gpa=3
+POSTHOOK: Input: default@char_tbl2@gpa=3.5
+#### A masked pattern was here ####
+alice underhill	46	3.5                                               	alice underhill	46
3.5  

http://git-wip-us.apache.org/repos/asf/hive/blob/fd378c3e/ql/src/test/results/clientpositive/vector_join_part_col_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join_part_col_char.q.out b/ql/src/test/results/clientpositive/vector_join_part_col_char.q.out
new file mode 100644
index 0000000..b6631f7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join_part_col_char.q.out
@@ -0,0 +1,198 @@
+PREHOOK: query: drop table if exists char_part_tbl1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_part_tbl1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists char_part_tbl2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_part_tbl2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age)
into 2 buckets stored as orc tblproperties('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@studenttab
+POSTHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age)
into 2 buckets stored as orc tblproperties('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@studenttab
+PREHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00),
('(yuri xylophone',30,2.74),('alice underhill',46,3.50)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@studenttab
+POSTHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00),
('(yuri xylophone',30,2.74),('alice underhill',46,3.50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@studenttab
+POSTHOOK: Lineage: studenttab.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: studenttab.gpa EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: studenttab.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+PREHOOK: query: create table char_tbl1(name string, age int) partitioned  by(gpa char(50))
stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@char_tbl1
+POSTHOOK: query: create table char_tbl1(name string, age int) partitioned  by(gpa char(50))
stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@char_tbl1
+PREHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5))
stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@char_tbl2
+POSTHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5))
stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@char_tbl2
+PREHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl1@gpa=3.5                                              

+POSTHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl1@gpa=3.5                                             
 
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5                                          
    ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5                                          
    ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null),
]
+PREHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab
where gpa = 2.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl1@gpa=2.5                                              

+POSTHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab
where gpa = 2.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl1@gpa=2.5                                             
 
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5                                          
    ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5                                          
    ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null),
]
+PREHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl2@gpa=3.5  
+POSTHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab
where gpa = 3.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl2@gpa=3.5  
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5  ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age,
type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5  ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name,
type:string, comment:null), ]
+PREHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab
where gpa = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab
+PREHOOK: Output: default@char_tbl2@gpa=3    
+POSTHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab
where gpa = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab
+POSTHOOK: Output: default@char_tbl2@gpa=3    
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3    ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age,
type:int, comment:null), ]
+POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3    ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name,
type:string, comment:null), ]
+PREHOOK: query: show partitions char_tbl1
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@char_tbl1
+POSTHOOK: query: show partitions char_tbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@char_tbl1
+gpa=2.5                                               
+gpa=3.5                                               
+PREHOOK: query: show partitions char_tbl2
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@char_tbl2
+POSTHOOK: query: show partitions char_tbl2
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@char_tbl2
+gpa=3    
+gpa=3.5  
+PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1
c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1
c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: c1
+            Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: name (type: string), age (type: int), gpa (type: char(50))
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats:
NONE
+              Reduce Output Operator
+                key expressions: _col2 (type: char(50))
+                sort order: +
+                Map-reduce partition columns: _col2 (type: char(50))
+                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: _col0 (type: string), _col1 (type: int)
+          TableScan
+            alias: c2
+            Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: name (type: string), age (type: int), gpa (type: char(5))
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats:
NONE
+              Reduce Output Operator
+                key expressions: _col2 (type: char(50))
+                sort order: +
+                Map-reduce partition columns: _col2 (type: char(50))
+                Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: _col0 (type: string), _col1 (type: int), _col2 (type:
char(5))
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col2 (type: char(50))
+            1 _col2 (type: char(50))
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_tbl1
+PREHOOK: Input: default@char_tbl1@gpa=2.5                                               
+PREHOOK: Input: default@char_tbl1@gpa=3.5                                               
+PREHOOK: Input: default@char_tbl2
+PREHOOK: Input: default@char_tbl2@gpa=3    
+PREHOOK: Input: default@char_tbl2@gpa=3.5  
+#### A masked pattern was here ####
+POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_tbl1
+POSTHOOK: Input: default@char_tbl1@gpa=2.5                                              

+POSTHOOK: Input: default@char_tbl1@gpa=3.5                                              

+POSTHOOK: Input: default@char_tbl2
+POSTHOOK: Input: default@char_tbl2@gpa=3    
+POSTHOOK: Input: default@char_tbl2@gpa=3.5  
+#### A masked pattern was here ####
+alice underhill	46	3.5                                               	alice underhill	46
3.5  
+PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_tbl1
+PREHOOK: Input: default@char_tbl1@gpa=2.5                                               
+PREHOOK: Input: default@char_tbl1@gpa=3.5                                               
+PREHOOK: Input: default@char_tbl2
+PREHOOK: Input: default@char_tbl2@gpa=3    
+PREHOOK: Input: default@char_tbl2@gpa=3.5  
+#### A masked pattern was here ####
+POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1
join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_tbl1
+POSTHOOK: Input: default@char_tbl1@gpa=2.5                                              

+POSTHOOK: Input: default@char_tbl1@gpa=3.5                                              

+POSTHOOK: Input: default@char_tbl2
+POSTHOOK: Input: default@char_tbl2@gpa=3    
+POSTHOOK: Input: default@char_tbl2@gpa=3.5  
+#### A masked pattern was here ####
+alice underhill	46	3.5                                               	alice underhill	46
3.5  


Mime
View raw message