hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject svn commit: r1484623 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/optimizer/physical/ test/queries/clientpositive/ test/results/clientpositive/
Date Mon, 20 May 2013 23:36:38 GMT
Author: omalley
Date: Mon May 20 23:36:37 2013
New Revision: 1484623

URL: http://svn.apache.org/r1484623
Log:
HIVE-4521 Auto join conversion fails in certain cases (Gunther Hagleitner via
omalley)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q
    hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
Mon May 20 23:36:37 2013
@@ -303,7 +303,7 @@ abstract public class AbstractBucketJoin
             // The number of files for the table should be same as number of buckets.
             int bucketCount = p.getBucketCount();
 
-            if (fileNames.size() != bucketCount) {
+            if (fileNames.size() != 0 && fileNames.size() != bucketCount) {
               String msg = "The number of buckets for table " +
                   tbl.getTableName() + " partition " + p.getName() + " is " +
                   p.getBucketCount() + ", whereas the number of files is " + fileNames.size();
@@ -333,7 +333,7 @@ abstract public class AbstractBucketJoin
         Integer num = new Integer(tbl.getNumBuckets());
 
         // The number of files for the table should be same as number of buckets.
-        if (fileNames.size() != num) {
+        if (fileNames.size() != 0 && fileNames.size() != num) {
           String msg = "The number of buckets for table " +
               tbl.getTableName() + " is " + tbl.getNumBuckets() +
               ", whereas the number of files is " + fileNames.size();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
Mon May 20 23:36:37 2013
@@ -48,7 +48,7 @@ public class AvgPartitionSizeBasedBigTab
   public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp)
     throws SemanticException {
     int bigTablePos = 0;
-    long maxSize = 0;
+    long maxSize = -1;
     int numPartitionsCurrentBigTable = 0; // number of partitions for the chosen big table
     HiveConf conf = parseCtx.getConf();
 
@@ -79,7 +79,7 @@ public class AvgPartitionSizeBasedBigTab
           for (Partition part : partsList.getNotDeniedPartns()) {
             totalSize += getSize(conf, part);
           }
-          averageSize = totalSize/numPartitions;
+          averageSize = numPartitions == 0 ? 0 : totalSize/numPartitions;
         }
 
         if (averageSize > maxSize) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
Mon May 20 23:36:37 2013
@@ -41,7 +41,7 @@ implements BigTableSelectorForAutoSMJ {
   public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp)
     throws SemanticException {
     int bigTablePos = 0;
-    long maxSize = 0;
+    long maxSize = -1;
     HiveConf conf = parseCtx.getConf();
 
     try {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
Mon May 20 23:36:37 2013
@@ -466,7 +466,7 @@ public class CommonJoinTaskDispatcher ex
             HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
 
         boolean bigTableFound = false;
-        long largestBigTableCandidateSize = 0;
+        long largestBigTableCandidateSize = -1;
         long sumTableSizes = 0;
         for (String alias : aliasToWork.keySet()) {
           int tablePosition = getPosition(currWork, joinOp, alias);

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q?rev=1484623&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q Mon May 20 23:36:37 2013
@@ -0,0 +1,78 @@
+set hive.auto.convert.join=true;
+
+-- empty tables
+create table studenttab10k (name string, age int, gpa double);
+create table votertab10k (name string, age int, registration string, contributions float);
+
+explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;
+
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.bucketmapjoin.sortedmerge=true;
+set hive.auto.convert.sortmerge.join=true;
+
+-- smb
+create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted
by (name) into 2 buckets;
+create table votertab10k_smb (name string, age int, registration string, contributions float)
clustered by (name) sorted by (name) into 2 buckets;
+
+explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb;
+load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb;
+load data local inpath '../data/files/empty1.txt' into table votertab10k_smb;
+load data local inpath '../data/files/empty2.txt' into table votertab10k_smb;
+
+explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+-- smb + partitions
+create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string)
clustered by (name) sorted by (name) into 2 buckets;
+create table votertab10k_part (name string, age int, registration string, contributions float)
partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
+
+load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition
(p='foo');
+load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition
(p='foo');
+load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo');
+load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo');
+
+explain select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name;
+
+drop table studenttab10k;
+drop table votertab10k;
+drop table studenttab10k_smb;
+drop table votertab10k_smb;
+drop table studenttab10k_part;
+drop table votertab10k_part;
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out?rev=1484623&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out Mon May 20 23:36:37 2013
@@ -0,0 +1,633 @@
+PREHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k
+PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions
float)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions
float)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k) s) (TOK_TABREF (TOK_TABNAME
votertab10k) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT
(TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s)
name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY
(. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-4 depends on stages: Stage-5
+  Stage-2 depends on stages: Stage-4
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        s 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        s 
+          TableScan
+            alias: s
+            HashTable Sink Operator
+              condition expressions:
+                0 {name}
+                1 {registration}
+              handleSkewJoin: false
+              keys:
+                0 [Column[name]]
+                1 [Column[name]]
+              Position of Big Table: 1
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        v 
+          TableScan
+            alias: v
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {name}
+                1 {registration}
+              handleSkewJoin: false
+              keys:
+                0 [Column[name]]
+                1 [Column[name]]
+              outputColumnNames: _col0, _col7
+              Position of Big Table: 1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col7
+                      type: string
+                outputColumnNames: _col0, _col7
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT _col7)
+                  bucketGroup: false
+                  keys:
+                        expr: _col0
+                        type: string
+                        expr: _col7
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+              sort order: ++
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: _col2
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k
+PREHOOK: Input: default@votertab10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k
+POSTHOOK: Input: default@votertab10k
+#### A masked pattern was here ####
+PREHOOK: query: -- smb
+create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted
by (name) into 2 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- smb
+create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted
by (name) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: create table votertab10k_smb (name string, age int, registration string,
contributions float) clustered by (name) sorted by (name) into 2 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k_smb (name string, age int, registration string,
contributions float) clustered by (name) sorted by (name) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF
(TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v)
name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (.
(TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration))))
(TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        s 
+          TableScan
+            alias: s
+            Sorted Merge Bucket Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {name}
+                1 {registration}
+              handleSkewJoin: false
+              keys:
+                0 [Column[name]]
+                1 [Column[name]]
+              outputColumnNames: _col0, _col7
+              Position of Big Table: 0
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col7
+                      type: string
+                outputColumnNames: _col0, _col7
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT _col7)
+                  bucketGroup: false
+                  keys:
+                        expr: _col0
+                        type: string
+                        expr: _col7
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    sort order: ++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col2
+                          type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k_smb
+PREHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k_smb
+POSTHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF
(TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v)
name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (.
(TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration))))
(TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        s 
+          TableScan
+            alias: s
+            Sorted Merge Bucket Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {name}
+                1 {registration}
+              handleSkewJoin: false
+              keys:
+                0 [Column[name]]
+                1 [Column[name]]
+              outputColumnNames: _col0, _col7
+              Position of Big Table: 0
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col7
+                      type: string
+                outputColumnNames: _col0, _col7
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT _col7)
+                  bucketGroup: false
+                  keys:
+                        expr: _col0
+                        type: string
+                        expr: _col7
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    sort order: ++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col2
+                          type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k_smb
+PREHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k_smb
+POSTHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+PREHOOK: query: -- smb + partitions
+create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string)
clustered by (name) sorted by (name) into 2 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- smb + partitions
+create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string)
clustered by (name) sorted by (name) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k_part
+PREHOOK: query: create table votertab10k_part (name string, age int, registration string,
contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2
buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k_part (name string, age int, registration string,
contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2
buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k_part
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part
partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_part
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part
partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_part
+POSTHOOK: Output: default@studenttab10k_part@p=foo
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part
partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_part@p=foo
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part
partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_part@p=foo
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part
partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_part
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part
partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_part
+POSTHOOK: Output: default@votertab10k_part@p=foo
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part
partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_part@p=foo
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part
partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_part@p=foo
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_part) s) (TOK_TABREF
(TOK_TABNAME votertab10k_part) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v)
name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (.
(TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration))))
(TOK_WHERE (and (= (. (TOK_TABLE_OR_COL s) p) 'bar') (= (. (TOK_TABLE_OR_COL v) p) 'bar')))
(TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        s 
+          TableScan
+            alias: s
+            Filter Operator
+              predicate:
+                  expr: (p = 'bar')
+                  type: boolean
+              Sorted Merge Bucket Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {name}
+                  1 {registration}
+                handleSkewJoin: false
+                keys:
+                  0 [Column[name]]
+                  1 [Column[name]]
+                outputColumnNames: _col0, _col8
+                Position of Big Table: 0
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col8
+                        type: string
+                  outputColumnNames: _col0, _col8
+                  Group By Operator
+                    aggregations:
+                          expr: count(DISTINCT _col8)
+                    bucketGroup: false
+                    keys:
+                          expr: _col0
+                          type: string
+                          expr: _col8
+                          type: string
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Reduce Output Operator
+                      key expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: string
+                      sort order: ++
+                      Map-reduce partition columns:
+                            expr: _col0
+                            type: string
+                      tag: -1
+                      value expressions:
+                            expr: _col2
+                            type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k_part
+PREHOOK: Input: default@votertab10k_part
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k_part
+POSTHOOK: Input: default@votertab10k_part
+#### A masked pattern was here ####
+PREHOOK: query: drop table studenttab10k
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@studenttab10k
+PREHOOK: Output: default@studenttab10k
+POSTHOOK: query: drop table studenttab10k
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@studenttab10k
+POSTHOOK: Output: default@studenttab10k
+PREHOOK: query: drop table votertab10k
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@votertab10k
+PREHOOK: Output: default@votertab10k
+POSTHOOK: query: drop table votertab10k
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@votertab10k
+POSTHOOK: Output: default@votertab10k
+PREHOOK: query: drop table studenttab10k_smb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@studenttab10k_smb
+PREHOOK: Output: default@studenttab10k_smb
+POSTHOOK: query: drop table studenttab10k_smb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@studenttab10k_smb
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: drop table votertab10k_smb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@votertab10k_smb
+PREHOOK: Output: default@votertab10k_smb
+POSTHOOK: query: drop table votertab10k_smb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@votertab10k_smb
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: drop table studenttab10k_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@studenttab10k_part
+PREHOOK: Output: default@studenttab10k_part
+POSTHOOK: query: drop table studenttab10k_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@studenttab10k_part
+POSTHOOK: Output: default@studenttab10k_part
+PREHOOK: query: drop table votertab10k_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@votertab10k_part
+PREHOOK: Output: default@votertab10k_part
+POSTHOOK: query: drop table votertab10k_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@votertab10k_part
+POSTHOOK: Output: default@votertab10k_part



Mime
View raw message