hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1471694 - in /hive/branches/branch-0.11/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ test/queries/clientpositive/ test/results/clientpositive/
Date Wed, 24 Apr 2013 21:32:19 GMT
Author: hashutosh
Date: Wed Apr 24 21:32:18 2013
New Revision: 1471694

URL: http://svn.apache.org/r1471694
Log:
HIVE-4371 : some issue with merging join trees (Navis via Ashutosh Chauhan)

Added:
    hive/branches/branch-0.11/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
    hive/branches/branch-0.11/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
Modified:
    hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
    hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
    hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SizeBasedBigTableSelectorForAutoSMJ.java
    hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java

Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java?rev=1471694&r1=1471693&r2=1471694&view=diff
==============================================================================
--- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
(original)
+++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
Wed Apr 24 21:32:18 2013
@@ -472,6 +472,10 @@ abstract public class AbstractSMBJoinPro
       (BigTableSelectorForAutoSMJ) ReflectionUtils.newInstance(bigTableMatcherClass, null);
     int bigTablePosition =
       bigTableMatcher.getBigTablePosition(pGraphContext, joinOp);
+    if (bigTablePosition < 0) {
+      // contains aliases from sub-query
+      return false;
+    }
     context.setBigTablePosition(bigTablePosition);
     String joinAlias =
       bigTablePosition == 0 ?

Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java?rev=1471694&r1=1471693&r2=1471694&view=diff
==============================================================================
--- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
(original)
+++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
Wed Apr 24 21:32:18 2013
@@ -57,6 +57,9 @@ public class AvgPartitionSizeBasedBigTab
       getListTopOps(joinOp, topOps);
       int currentPos = 0;
       for (TableScanOperator topOp : topOps) {
+        if (topOp == null) {
+          return -1;
+        }
         int numPartitions = 1; // in case the sizes match, preference is
                                // given to the table with fewer partitions
         Table table = parseCtx.getTopToTable().get(topOp);

Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SizeBasedBigTableSelectorForAutoSMJ.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SizeBasedBigTableSelectorForAutoSMJ.java?rev=1471694&r1=1471693&r2=1471694&view=diff
==============================================================================
--- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SizeBasedBigTableSelectorForAutoSMJ.java
(original)
+++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SizeBasedBigTableSelectorForAutoSMJ.java
Wed Apr 24 21:32:18 2013
@@ -23,6 +23,7 @@ import java.util.List;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -43,9 +44,10 @@ public abstract class SizeBasedBigTableS
 
     for (Operator<? extends OperatorDesc> parentOp : op.getParentOperators()) {
       if (parentOp instanceof TableScanOperator) {
-        topOps.add((TableScanOperator)parentOp);
-      }
-      else {
+        topOps.add((TableScanOperator) parentOp);
+      } else if (parentOp instanceof CommonJoinOperator) {
+        topOps.add(null);
+      } else {
         getListTopOps(parentOp, topOps);
       }
     }

Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java?rev=1471694&r1=1471693&r2=1471694&view=diff
==============================================================================
--- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
(original)
+++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
Wed Apr 24 21:32:18 2013
@@ -49,6 +49,9 @@ implements BigTableSelectorForAutoSMJ {
       getListTopOps(joinOp, topOps);
       int currentPos = 0;
       for (TableScanOperator topOp : topOps) {
+        if (topOp == null) {
+          return -1;
+        }
         Table table = parseCtx.getTopToTable().get(topOp);
         long currentSize = 0;
 

Added: hive/branches/branch-0.11/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q?rev=1471694&view=auto
==============================================================================
--- hive/branches/branch-0.11/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
(added)
+++ hive/branches/branch-0.11/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
Wed Apr 24 21:32:18 2013
@@ -0,0 +1,31 @@
+-- small 1 part, 2 bucket & big 2 part, 4 bucket
+
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small
partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small
partition(ds='2008-04-08');
+
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY
(key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_medium
partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_medium
partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_medium
partition(ds='2008-04-08');
+
+explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key
JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big
c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key;

Added: hive/branches/branch-0.11/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out?rev=1471694&view=auto
==============================================================================
--- hive/branches/branch-0.11/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
(added)
+++ hive/branches/branch-0.11/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
Wed Apr 24 21:32:18 2013
@@ -0,0 +1,526 @@
+PREHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket
+
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket
+
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@bucket_small
+PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO
TABLE bucket_small partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO
TABLE bucket_small partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_small
+POSTHOOK: Output: default@bucket_small@ds=2008-04-08
+PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO
TABLE bucket_small partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_small@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO
TABLE bucket_small partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_small@ds=2008-04-08
+PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@bucket_big
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big
+POSTHOOK: Output: default@bucket_big@ds=2008-04-08
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big@ds=2008-04-08
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big@ds=2008-04-08
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big@ds=2008-04-08
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big
+POSTHOOK: Output: default@bucket_big@ds=2008-04-09
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big@ds=2008-04-09
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big@ds=2008-04-09
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big@ds=2008-04-09
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big@ds=2008-04-09
+PREHOOK: query: load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_big@ds=2008-04-09
+POSTHOOK: query: load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE
bucket_big partition(ds='2008-04-09')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_big@ds=2008-04-09
+PREHOOK: query: CREATE TABLE bucket_medium (key string, value string) partitioned by (ds
string)
+CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE bucket_medium (key string, value string) partitioned by (ds
string)
+CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@bucket_medium
+PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO
TABLE bucket_medium partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_medium
+POSTHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO
TABLE bucket_medium partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_medium
+POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
+PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO
TABLE bucket_medium partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_medium@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO
TABLE bucket_medium partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
+PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO
TABLE bucket_medium partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@bucket_medium@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO
TABLE bucket_medium partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
+PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b
ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium
b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small)
a) (TOK_TABREF (TOK_TABNAME bucket_medium) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL
b) key))) (TOK_TABREF (TOK_TABNAME bucket_big) c) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL
b) key))) (TOK_TABREF (TOK_TABNAME bucket_medium) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL
b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR
count)))))
+
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-7 depends on stages: Stage-8
+  Stage-2 depends on stages: Stage-7
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        b 
+          Fetch Operator
+            limit: -1
+        d 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            GatherStats: false
+            HashTable Sink Operator
+              condition expressions:
+                0 
+                1 {key}
+                2 {key}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+                2 [Column[key]]
+              Position of Big Table: 2
+        b 
+          TableScan
+            alias: b
+            GatherStats: false
+            HashTable Sink Operator
+              condition expressions:
+                0 
+                1 {key}
+                2 {key}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+                2 [Column[key]]
+              Position of Big Table: 2
+        d 
+          TableScan
+            alias: d
+            GatherStats: false
+            HashTable Sink Operator
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 []
+                1 []
+              Position of Big Table: 0
+
+  Stage: Stage-7
+    Map Reduce
+      Alias -> Map Operator Tree:
+        c 
+          TableScan
+            alias: c
+            GatherStats: false
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+                   Inner Join 1 to 2
+              condition expressions:
+                0 
+                1 {key}
+                2 {key}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+                2 [Column[key]]
+              outputColumnNames: _col5, _col10
+              Position of Big Table: 2
+              Filter Operator
+                isSamplingPred: false
+                predicate:
+                    expr: (_col10 = _col5)
+                    type: boolean
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 
+                      1 
+                    handleSkewJoin: false
+                    keys:
+                      0 []
+                      1 []
+                    Position of Big Table: 0
+                    Select Operator
+                      Group By Operator
+                        aggregations:
+                              expr: count()
+                        bucketGroup: false
+                        mode: hash
+                        outputColumnNames: _col0
+                        File Output Operator
+                          compressed: false
+                          GlobalTableId: 0
+#### A masked pattern was here ####
+                          NumFilesPerFileSink: 1
+                          table:
+                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              properties:
+                                columns _col0
+                                columns.types bigint
+                                escape.delim \
+                          TotalFiles: 1
+                          GatherStats: false
+                          MultiFileSpray: false
+      Local Work:
+        Map Reduce Local Work
+      Needs Tagging: true
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+            properties:
+              bucket_count 4
+              bucket_field_name key
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_big
+              numFiles 4
+              numRows 0
+              partition_columns ds
+              rawDataSize 0
+              serialization.ddl struct bucket_big { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 4
+                bucket_field_name key
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_big
+                numFiles 8
+                numPartitions 2
+                numRows 0
+                partition_columns ds
+                rawDataSize 0
+                serialization.ddl struct bucket_big { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 11624
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_big
+            name: default.bucket_big
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-09
+            properties:
+              bucket_count 4
+              bucket_field_name key
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_big
+              numFiles 4
+              numRows 0
+              partition_columns ds
+              rawDataSize 0
+              serialization.ddl struct bucket_big { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 4
+                bucket_field_name key
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_big
+                numFiles 8
+                numPartitions 2
+                numRows 0
+                partition_columns ds
+                rawDataSize 0
+                serialization.ddl struct bucket_big { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 11624
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_big
+            name: default.bucket_big
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+            properties:
+              bucket_count 3
+              bucket_field_name key
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_medium
+              numFiles 3
+              numRows 0
+              partition_columns ds
+              rawDataSize 0
+              serialization.ddl struct bucket_medium { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 170
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 3
+                bucket_field_name key
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_medium
+                numFiles 3
+                numPartitions 1
+                numRows 0
+                partition_columns ds
+                rawDataSize 0
+                serialization.ddl struct bucket_medium { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 170
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_medium
+            name: default.bucket_medium
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+            properties:
+              bucket_count 2
+              bucket_field_name key
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_small
+              numFiles 2
+              numRows 0
+              partition_columns ds
+              rawDataSize 0
+              serialization.ddl struct bucket_small { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 114
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 2
+                bucket_field_name key
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_small
+                numFiles 2
+                numPartitions 1
+                numRows 0
+                partition_columns ds
+                rawDataSize 0
+                serialization.ddl struct bucket_small { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 114
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_small
+            name: default.bucket_small
+      Truncated Path -> Alias:
+        /bucket_big/ds=2008-04-08 [c]
+        /bucket_big/ds=2008-04-09 [c]
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              sort order: 
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: bigint
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10002
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0
+              columns.types bigint
+              escape.delim \
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0
+                columns.types bigint
+                escape.delim \
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          mode: mergepartial
+          outputColumnNames: _col0
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: bigint
+            outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    columns _col0
+                    columns.types bigint
+                    escape.delim \
+                    serialization.format 1
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key
JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@ds=2008-04-08
+PREHOOK: Input: default@bucket_big@ds=2008-04-09
+PREHOOK: Input: default@bucket_medium
+PREHOOK: Input: default@bucket_medium@ds=2008-04-08
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key
JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@ds=2008-04-08
+POSTHOOK: Input: default@bucket_big@ds=2008-04-09
+POSTHOOK: Input: default@bucket_medium
+POSTHOOK: Input: default@bucket_medium@ds=2008-04-08
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@ds=2008-04-08
+#### A masked pattern was here ####
+570



Mime
View raw message