hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1635630 [1/3] - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/
Date Thu, 30 Oct 2014 22:23:41 GMT
Author: xuefu
Date: Thu Oct 30 22:23:40 2014
New Revision: 1635630

URL: http://svn.apache.org/r1635630
Log:
HIVE-8610: Compile time skew join optimization doesn't work with auto map join (Rui via Xuefu)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q
    hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin5.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin6.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin8.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin9.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Thu Oct 30 22:23:40 2014
@@ -1093,8 +1093,9 @@ public abstract class Operator<T extends
 
     @SuppressWarnings("unchecked")
     T descClone = (T)conf.clone();
+    // also clone the colExprMap by default
     Operator<? extends OperatorDesc> ret =
-        OperatorFactory.getAndMakeChild(descClone, getSchema(), parentClones);
+        OperatorFactory.getAndMakeChild(descClone, getSchema(), getColumnExprMap(), parentClones);
 
     return ret;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java Thu Oct 30 22:23:40 2014
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.lib.Rul
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.QBJoinTree;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -164,12 +165,23 @@ public class SkewJoinOptimizer implement
         return null;
       }
 
+      // have to create a QBJoinTree for the cloned join operator
+      QBJoinTree originJoinTree = parseContext.getJoinContext().get(joinOp);
+      QBJoinTree newJoinTree;
+      try {
+        newJoinTree = originJoinTree.clone();
+      } catch (CloneNotSupportedException e) {
+        LOG.debug("QBJoinTree could not be cloned: ", e);
+        return null;
+      }
+
       JoinOperator joinOpClone;
       if (processSelect) {
         joinOpClone = (JoinOperator)(currOpClone.getParentOperators().get(0));
       } else {
         joinOpClone = (JoinOperator)currOpClone;
       }
+      parseContext.getJoinContext().put(joinOpClone, newJoinTree);
 
       List<TableScanOperator> tableScanCloneOpsForJoin =
           new ArrayList<TableScanOperator>();
@@ -201,6 +213,7 @@ public class SkewJoinOptimizer implement
         }
 
         parseContext.getTopOps().put(newAlias, tso);
+        setUpAlias(originJoinTree, newJoinTree, tabAlias, newAlias, tso);
       }
 
       // Now do a union of the select operators: selectOp and selectOpClone
@@ -610,6 +623,48 @@ public class SkewJoinOptimizer implement
         }
       }
     }
+
+    /**
+     * Set alias in the cloned join tree
+     */
+    private static void setUpAlias(QBJoinTree origin, QBJoinTree cloned, String origAlias,
+        String newAlias, Operator<? extends OperatorDesc> topOp) {
+      cloned.getAliasToOpInfo().remove(origAlias);
+      cloned.getAliasToOpInfo().put(newAlias, topOp);
+      if (origin.getLeftAlias().equals(origAlias)) {
+        cloned.setLeftAlias(null);
+        cloned.setLeftAlias(newAlias);
+      }
+      replaceAlias(origin.getLeftAliases(), cloned.getLeftAliases(), origAlias, newAlias);
+      replaceAlias(origin.getRightAliases(), cloned.getRightAliases(), origAlias, newAlias);
+      replaceAlias(origin.getBaseSrc(), cloned.getBaseSrc(), origAlias, newAlias);
+      replaceAlias(origin.getMapAliases(), cloned.getMapAliases(), origAlias, newAlias);
+      replaceAlias(origin.getStreamAliases(), cloned.getStreamAliases(), origAlias, newAlias);
+    }
+
+    private static void replaceAlias(String[] origin, String[] cloned,
+        String alias, String newAlias) {
+      if (origin == null || cloned == null || origin.length != cloned.length) {
+        return;
+      }
+      for (int i = 0; i < origin.length; i++) {
+        if (origin[i].equals(alias)) {
+          cloned[i] = newAlias;
+        }
+      }
+    }
+
+    private static void replaceAlias(List<String> origin, List<String> cloned,
+        String alias, String newAlias) {
+      if (origin == null || cloned == null || origin.size() != cloned.size()) {
+        return;
+      }
+      for (int i = 0; i < origin.size(); i++) {
+        if (origin.get(i).equals(alias)) {
+          cloned.set(i, newAlias);
+        }
+      }
+    }
   }
 
   /* (non-Javadoc)

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java Thu Oct 30 22:23:40 2014
@@ -79,4 +79,7 @@ public class JoinCond {
     this.joinType = joinType;
   }
 
+  public void setPreserved(boolean preserved) {
+    this.preserved = preserved;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java Thu Oct 30 22:23:40 2014
@@ -32,7 +32,7 @@ import org.apache.hadoop.hive.ql.plan.Op
  * Internal representation of the join tree.
  *
  */
-public class QBJoinTree implements Serializable{
+public class QBJoinTree implements Serializable, Cloneable {
   private static final long serialVersionUID = 1L;
   private String leftAlias;
   private String[] rightAliases;
@@ -363,4 +363,70 @@ public class QBJoinTree implements Seria
   public List<ASTNode> getPostJoinFilters() {
     return postJoinFilters;
   }
+
+  @Override
+  public QBJoinTree clone() throws CloneNotSupportedException {
+    QBJoinTree cloned = new QBJoinTree();
+
+    // shallow copy aliasToOpInfo, we won't want to clone the operator tree here
+    cloned.setAliasToOpInfo(aliasToOpInfo == null ? null :
+        new HashMap<String, Operator<? extends OperatorDesc>>(aliasToOpInfo));
+
+    cloned.setBaseSrc(baseSrc == null ? null : baseSrc.clone());
+
+    // shallow copy ASTNode
+    cloned.setExpressions(expressions);
+    cloned.setFilters(filters);
+    cloned.setFiltersForPushing(filtersForPushing);
+
+    // clone filterMap
+    int[][] clonedFilterMap = filterMap == null ? null : new int[filterMap.length][];
+    if (filterMap != null) {
+      for (int i = 0; i < filterMap.length; i++) {
+        clonedFilterMap[i] = filterMap[i] == null ? null : filterMap[i].clone();
+      }
+    }
+    cloned.setFilterMap(clonedFilterMap);
+
+    cloned.setId(id);
+
+    // clone joinCond
+    JoinCond[] clonedJoinCond = joinCond == null ? null : new JoinCond[joinCond.length];
+    if (joinCond != null) {
+      for (int i = 0; i < joinCond.length; i++) {
+        if(joinCond[i] == null) {
+          continue;
+        }
+        JoinCond clonedCond = new JoinCond();
+        clonedCond.setJoinType(joinCond[i].getJoinType());
+        clonedCond.setLeft(joinCond[i].getLeft());
+        clonedCond.setPreserved(joinCond[i].getPreserved());
+        clonedCond.setRight(joinCond[i].getRight());
+        clonedJoinCond[i] = clonedCond;
+      }
+    }
+    cloned.setJoinCond(clonedJoinCond);
+
+    cloned.setJoinSrc(joinSrc == null ? null : joinSrc.clone());
+    cloned.setLeftAlias(leftAlias);
+    cloned.setLeftAliases(leftAliases == null ? null : leftAliases.clone());
+    cloned.setMapAliases(mapAliases == null ? null : new ArrayList<String>(mapAliases));
+    cloned.setMapSideJoin(mapSideJoin);
+    cloned.setNoOuterJoin(noOuterJoin);
+    cloned.setNoSemiJoin(noSemiJoin);
+    cloned.setNullSafes(nullsafes == null ? null : new ArrayList<Boolean>(nullsafes));
+    cloned.setRightAliases(rightAliases == null ? null : rightAliases.clone());
+    cloned.setStreamAliases(streamAliases == null ? null : new ArrayList<String>(streamAliases));
+
+    // clone postJoinFilters
+    for (ASTNode filter : postJoinFilters) {
+      cloned.getPostJoinFilters().add(filter);
+    }
+    // clone rhsSemijoin
+    for (Entry<String, ArrayList<ASTNode>> entry : rhsSemijoin.entrySet()) {
+      cloned.addRHSSemijoinColumns(entry.getKey(), entry.getValue());
+    }
+
+    return cloned;
+  }
 }

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,44 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt1
+-- test compile time skew join and auto map join
+-- a simple join query with skew on both the tables on the join key
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,53 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1;
+
+-- testing skew on other data types - int
+CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2));
+INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1;
+
+CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2;
+
+CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3));
+
+INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2;
+
+-- copy from skewjoinopt15
+-- test compile time skew join and auto map join
+-- The skewed key is a integer column.
+-- Otherwise this test is similar to skewjoinopt1.q
+-- Both the joined tables are skewed, and the joined column
+-- is an integer
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,26 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) INTO 4 BUCKETS
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt19
+-- test compile time skew join and auto map join
+-- add a test where the skewed key is also the bucketized key
+-- it should not matter, and the compile time skewed join
+-- optimization is performed
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,34 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt3
+-- test compile time skew join and auto map join
+-- a simple query with skew on both the tables. One of the skewed
+-- value is common to both the tables. The skewed value should not be
+-- repeated in the filter.
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,26 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt6
+-- test compile time skew join and auto map join
+-- Both the join tables are skewed by 2 keys, and one of the skewed values
+-- is common to both the tables. The join key is a subset of the skewed key set:
+-- it only contains the first skewed key for both the tables
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,30 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+
+-- copy from skewjoinopt7
+-- test compile time skew join and auto map join
+-- This test is for validating skewed join compile time optimization for more than
+-- 2 tables. The join key is the same, and so a 3-way join would be performed.
+-- 2 of the 3 tables are skewed on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key;
+
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,51 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt9
+-- test compile time skew join and auto map join
+-- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a union all
+-- adding a order by at the end to make the results deterministic
+EXPLAIN
+select * from
+(
+select key, val from T1
+  union all 
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key;
+
+select * from
+(
+select key, val from T1
+  union all 
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.val, b.val;
+
+-- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a group by
+EXPLAIN
+select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key;
+
+select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.cnt, b.val;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,22 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+drop table array_valued_T1;
+create table array_valued_T1 (key string, value array<string>) SKEWED BY (key) ON ((8));
+insert overwrite table array_valued_T1 select key, array(value) from T1;
+
+-- copy from skewjoinopt10
+-- test compile time skew join and auto map join
+-- This test is to verify the skew join compile optimization when the join is followed by a lateral view
+-- adding a order by at the end to make the results deterministic
+
+explain 
+select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val;
+
+select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val
+ORDER BY key, val;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,35 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+    
+CREATE TABLE T1(key STRING, val STRING)	
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;	
+       
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;	
+     
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;	
+       
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;	
+     
+-- copy from skewjoinopt11
+-- test compile time skew join and auto map join	 
+-- This test is to verify the skew join compile optimization when the join is followed
+-- by a union. Both sides of a union consist of a join, which should have used
+-- skew join compile time optimization.
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN	 
+select * from	
+(      
+  select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+    union all 	
+  select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+) subq1; 
+  
+select * from	
+(      
+  select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+    union all 	
+  select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+) subq1
+ORDER BY key, val1, val2;

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,38 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+CREATE TABLE T3(key STRING, val STRING)
+SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+
+-- copy from skewjoinopt13
+-- test compile time skew join and auto map join
+-- This test is for skewed join compile time optimization for more than 2 tables.
+-- The join key for table 3 is different from the join key used for joining
+-- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table
+-- 3 consist of a sub-query which contains a join, the compile time skew join 
+-- optimization is not performed
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+select *
+from 
+T1 a join T2 b on a.key = b.key 
+join T3 c on a.val = c.val;
+
+select *
+from 
+T1 a join T2 b on a.key = b.key 
+join T3 c on a.val = c.val
+order by a.key, b.key, c.key, a.val, b.val, c.val;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,40 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+CREATE TABLE T3(key STRING, val STRING)
+SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+
+-- copy from skewjoinopt14
+-- test compile time skew join and auto map join
+-- This test is for skewed join compile time optimization for more than 2 tables.
+-- The join key for table 3 is different from the join key used for joining
+-- tables 1 and 2. Tables 1 and 3 are skewed. Since one of the join sources for table
+-- 3 consist of a sub-query which contains a join, the compile time skew join 
+-- optimization is not enabled for table 3, but it is used for the first join between
+-- tables 1 and 2
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+select *
+from 
+T1 a join T2 b on a.key = b.key 
+join T3 c on a.val = c.val;
+
+select *
+from 
+T1 a join T2 b on a.key = b.key 
+join T3 c on a.val = c.val
+order by a.key, b.key, a.val, b.val;
+

Added: hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out Thu Oct 30 22:23:40 2014
@@ -0,0 +1,660 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- copy from skewjoinopt1
+-- test compile time skew join and auto map join
+-- a simple join query with skew on both the tables on the join key
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- copy from skewjoinopt1
+-- test compile time skew join and auto map join
+-- a simple join query with skew on both the tables on the join key
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18
+PREHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: ((key = '2') or (key = '3')) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: ((key = '2') or (key = '3')) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+NULL	NULL	4	14
+NULL	NULL	5	15
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18
+PREHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: ((key = '2') or (key = '3')) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+          TableScan
+            alias: b
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: ((key = '2') or (key = '3')) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+8

Added: hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out Thu Oct 30 22:23:40 2014
@@ -0,0 +1,700 @@
+PREHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmpT1
+POSTHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmpT1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tmpt1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tmpt1
+PREHOOK: query: -- testing skew on other data types - int
+CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: -- testing skew on other data types - int
+CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmpt1
+PREHOOK: Output: default@t1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmpt1
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmpT2
+POSTHOOK: query: CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmpT2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tmpt2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tmpt2
+PREHOOK: query: CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmpt2
+PREHOOK: Output: default@t2
+POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmpt2
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: -- copy from skewjoinopt15
+-- test compile time skew join and auto map join
+-- The skewed key is a integer column.
+-- Otherwise this test is similar to skewjoinopt1.q
+-- Both the joined tables are skewed, and the joined column
+-- is an integer
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- copy from skewjoinopt15
+-- test compile time skew join and auto map join
+-- The skewed key is a integer column.
+-- Otherwise this test is similar to skewjoinopt1.q
+-- Both the joined tables are skewed, and the joined column
+-- is an integer
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18
+PREHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((key = 2) or (key = 3)) (type: boolean)
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((key = 2) or (key = 3)) (type: boolean)
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 {key} {val}
+                  1 {key} {val}
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+NULL	NULL	4	14
+NULL	NULL	5	15
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18
+PREHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-2 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+        subquery1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((key = 2) or (key = 3)) (type: boolean)
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+        subquery1:a 
+          TableScan
+            alias: a
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+          TableScan
+            alias: b
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((key = 2) or (key = 3)) (type: boolean)
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                condition expressions:
+                  0 
+                  1 
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                  Union
+                    Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      SELECT * : (no compute)
+                      Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(1)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+8



Mime
View raw message