hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1643058 - in /hive/branches/spark: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/...
Date Mon, 15 Dec 2014 17:12:14 GMT
Author: xuefu
Date: Wed Dec  3 05:48:21 2014
New Revision: 1643058

URL: http://svn.apache.org/r1643058
Log:
HIVE-8943: Fix memory limit check for combine nested mapjoins [Spark Branch] (Szehon via Xuefu)

Added:
    hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q
    hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q
    hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out
Modified:
    hive/branches/spark/itests/src/test/resources/testconfiguration.properties
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java

Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1643058&r1=1643057&r2=1643058&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Wed Dec  3 05:48:21 2014
@@ -491,6 +491,8 @@ spark.query.files=add_part_multiple.q, \
   auto_join_filters.q, \
   auto_join_nulls.q, \
   auto_join_reordering_values.q, \
+  auto_join_stats.q, \
+  auto_join_stats2.q, \
   auto_join_without_localtask.q, \
   auto_smb_mapjoin_14.q, \
   auto_sortmerge_join_1.q, \

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java?rev=1643058&r1=1643057&r2=1643058&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java Wed Dec  3 05:48:21 2014
@@ -24,6 +24,7 @@ import java.util.Stack;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -90,7 +91,6 @@ public class SparkMapJoinOptimizer imple
     int numBuckets = 1;
 
     LOG.info("Estimated number of buckets " + numBuckets);
-    int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets);
 
     /* TODO: handle this later
     if (mapJoinConversionPos < 0) {
@@ -153,8 +153,8 @@ public class SparkMapJoinOptimizer imple
 
     LOG.info("Convert to non-bucketed map join");
     // check if we can convert to map join no bucket scaling.
-    mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1);
-
+    ObjectPair<Integer, Long> mapJoinInfo = getMapJoinConversionInfo(joinOp, context, 1);
+    int mapJoinConversionPos = mapJoinInfo.getFirst();
 
     if (mapJoinConversionPos < 0) {
       // we are just converting to a common merge join operator. The shuffle
@@ -175,6 +175,8 @@ public class SparkMapJoinOptimizer imple
       setAllChildrenTraitsToNull(childOp);
     }
 
+    context.getMjOpSizes().put(mapJoinOp, mapJoinInfo.getSecond());
+
     return null;
   }
 
@@ -311,10 +313,10 @@ public class SparkMapJoinOptimizer imple
    * @param joinOp
    * @param context
    * @param buckets
-   * @return
+   * @return pair, first value is the position, second value is the in-memory size of this mapjoin.
    */
-  private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContext context,
-      int buckets) {
+  private ObjectPair<Integer, Long> getMapJoinConversionInfo(JoinOperator joinOp, OptimizeSparkProcContext context,
+                                                                int buckets) {
     Set<Integer> bigTableCandidateSet =
         MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds());
 
@@ -336,7 +338,7 @@ public class SparkMapJoinOptimizer imple
       Statistics currInputStat = parentOp.getStatistics();
       if (currInputStat == null) {
         LOG.warn("Couldn't get statistics from: "+parentOp);
-        return -1;
+        return new ObjectPair(-1, 0);
       }
 
       // Union is hard to handle. For instance, the following case:
@@ -359,7 +361,7 @@ public class SparkMapJoinOptimizer imple
       // But, this is tricky to implement, and we'll leave it as a future work for now.
       // TODO: handle this as a MJ case
       if (containUnionWithoutRS(parentOp.getParentOperators().get(0))) {
-        return -1;
+        return new ObjectPair(-1, 0);
       }
 
       long inputSize = currInputStat.getDataSize();
@@ -370,14 +372,14 @@ public class SparkMapJoinOptimizer imple
         if (bigTableFound) {
           // cannot convert to map join; we've already chosen a big table
           // on size and there's another one that's bigger.
-          return -1;
+          return new ObjectPair(-1, 0);
         }
 
         if (inputSize/buckets > maxSize) {
           if (!bigTableCandidateSet.contains(pos)) {
             // can't use the current table as the big table, but it's too
             // big for the map side.
-            return -1;
+            return new ObjectPair(-1, 0);
           }
 
           bigTableFound = true;
@@ -392,7 +394,7 @@ public class SparkMapJoinOptimizer imple
         if (totalSize/buckets > maxSize) {
           // sum of small tables size in this join exceeds configured limit
           // hence cannot convert.
-          return -1;
+          return new ObjectPair(-1, 0);
         }
 
         if (bigTableCandidateSet.contains(pos)) {
@@ -403,13 +405,93 @@ public class SparkMapJoinOptimizer imple
         totalSize += currInputStat.getDataSize();
         if (totalSize/buckets > maxSize) {
           // cannot hold all map tables in memory. Cannot convert.
-          return -1;
+          return new ObjectPair(-1, 0);
         }
       }
       pos++;
     }
 
-    return bigTablePosition;
+    if (bigTablePosition == -1) {
+      //No big table candidates.
+      return new ObjectPair(-1, 0);
+    }
+
+    //Final check, find size of already-calculated Mapjoin Operators in same work (spark-stage).  We need to factor
+    //this in to prevent overwhelming Spark executor-memory.
+    long connectedMapJoinSize = getConnectedMapJoinSize(joinOp.getParentOperators().get(bigTablePosition), joinOp, context);
+    if ((connectedMapJoinSize + (totalSize / buckets)) > maxSize) {
+      return new ObjectPair(-1, 0);
+    }
+
+    return new ObjectPair(bigTablePosition, connectedMapJoinSize + (totalSize / buckets));
+  }
+
+  /**
+   * Examines this operator and all the connected operators, for mapjoins that will be in the same work.
+   * @param parentOp potential big-table parent operator, explore up from this.
+   * @param joinOp potential mapjoin operator, explore down from this.
+   * @param ctx context to pass information.
+   * @return total size of parent mapjoins in same work as this operator.
+   */
+  private long getConnectedMapJoinSize(Operator<? extends OperatorDesc> parentOp, Operator joinOp, OptimizeSparkProcContext ctx) {
+    long result = 0;
+    for (Operator<? extends OperatorDesc> grandParentOp : parentOp.getParentOperators()) {
+      result += getConnectedParentMapJoinSize(grandParentOp, ctx);
+    }
+    result += getConnectedChildMapJoinSize(joinOp, ctx);
+    return result;
+  }
+
+  /**
+   * Examines this operator and all the parents, for mapjoins that will be in the same work.
+   * @param op given operator
+   * @param ctx context to pass information.
+   * @return total size of parent mapjoins in same work as this operator.
+   */
+  private long getConnectedParentMapJoinSize(Operator<? extends OperatorDesc> op, OptimizeSparkProcContext ctx) {
+    if ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator)) {
+      //Work Boundary, stop exploring.
+      return 0;
+    }
+
+    if (op instanceof MapJoinOperator) {
+      //found parent mapjoin operator.  Its size should already reflect any other mapjoins connected to it.
+      long mjSize = ctx.getMjOpSizes().get(op);
+      return mjSize;
+    }
+
+    long result = 0;
+    for (Operator<? extends OperatorDesc> parentOp : op.getParentOperators()) {
+      //Else, recurse up the parents.
+      result += getConnectedParentMapJoinSize(parentOp, ctx);
+    }
+    return result;
+  }
+
+  /**
+   * Examines this operator and all the children, for mapjoins that will be in the same work.
+   * @param op given operator
+   * @param ctx context to pass information.
+   * @return total size of child mapjoins in same work as this operator.
+   */
+  private long getConnectedChildMapJoinSize(Operator<? extends OperatorDesc> op, OptimizeSparkProcContext ctx) {
+    if ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator)) {
+      //Work Boundary, stop exploring.
+      return 0;
+    }
+
+    if (op instanceof MapJoinOperator) {
+      //found child mapjoin operator.  Its size should already reflect any mapjoins connected to it, so stop processing.
+      long mjSize = ctx.getMjOpSizes().get(op);
+      return mjSize;
+    }
+
+    long result = 0;
+    for (Operator<? extends OperatorDesc> childOp : op.getChildOperators()) {
+      //Else, recurse to the children.
+      result += getConnectedChildMapJoinSize(childOp, ctx);
+    }
+    return result;
   }
 
   /*

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java?rev=1643058&r1=1643057&r2=1643058&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java Wed Dec  3 05:48:21 2014
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.parse.spark;
 
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
@@ -28,7 +29,9 @@ import org.apache.hadoop.hive.ql.parse.P
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
 import java.util.Deque;
+import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Map;
 import java.util.Set;
 
 /**
@@ -44,6 +47,7 @@ public class OptimizeSparkProcContext im
   private final Set<ReadEntity> inputs;
   private final Set<WriteEntity> outputs;
   private final Set<ReduceSinkOperator> visitedReduceSinks = new HashSet<ReduceSinkOperator>();
+  private final Map<MapJoinOperator, Long> mjOpSizes = new HashMap<MapJoinOperator, Long>();
 
   // rootOperators are all the table scan operators in sequence
   // of traversal
@@ -83,4 +87,8 @@ public class OptimizeSparkProcContext im
   public Deque<Operator<? extends OperatorDesc>> getRootOperators() {
     return rootOperators;
   }
+
+  public Map<MapJoinOperator, Long> getMjOpSizes() {
+    return mjOpSizes;
+  }
 }

Added: hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q (added)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q Wed Dec  3 05:48:21 2014
@@ -0,0 +1,19 @@
+set hive.auto.convert.join = true;
+set hive.auto.convert.join.noconditionaltask.size=2660;
+
+-- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable;
+analyze table smalltable compute statistics;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+
+create table smalltable2(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable2;
+analyze table smalltable compute statistics;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
\ No newline at end of file

Added: hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q (added)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q Wed Dec  3 05:48:21 2014
@@ -0,0 +1,17 @@
+set hive.auto.convert.join = true;
+
+-- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+
+create table smalltable2(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable2;
+analyze table smalltable compute statistics;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
\ No newline at end of file

Added: hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out Wed Dec  3 05:48:21 2014
@@ -0,0 +1,545 @@
+PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage , consists of Stage-10, Stage-11, Stage-1
+  Stage-10 has a backup stage: Stage-1
+  Stage-6 depends on stages: Stage-10
+  Stage-9 depends on stages: Stage-1, Stage-6, Stage-7
+  Stage-5 depends on stages: Stage-9
+  Stage-11 has a backup stage: Stage-1
+  Stage-7 depends on stages: Stage-11
+  Stage-1
+  Stage-0 depends on stages: Stage-5
+
+STAGE PLANS:
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-10
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        src2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        src2 
+          TableScan
+            alias: src2
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              HashTable Sink Operator
+                condition expressions:
+                  0 {key}
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key}
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col5
+                Filter Operator
+                  predicate: (_col0 + _col5) is not null (type: boolean)
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-9
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        smalltable 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        smalltable 
+          TableScan
+            alias: smalltable
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: UDFToDouble(key) is not null (type: boolean)
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {_col0} {_col5}
+                  1 {key}
+                keys:
+                  0 (_col0 + _col5) (type: double)
+                  1 UDFToDouble(key) (type: double)
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {_col0} {_col5}
+                1 {key}
+              keys:
+                0 (_col0 + _col5) (type: double)
+                1 UDFToDouble(key) (type: double)
+              outputColumnNames: _col0, _col5, _col10
+              Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-11
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        src1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        src1 
+          TableScan
+            alias: src1
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src2
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key}
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col5
+                Filter Operator
+                  predicate: (_col0 + _col5) is not null (type: boolean)
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            alias: src2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {KEY.reducesinkkey0}
+            1 {KEY.reducesinkkey0}
+          outputColumnNames: _col0, _col5
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (_col0 + _col5) is not null (type: boolean)
+            Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1
+  Stage-13 has a backup stage: Stage-1
+  Stage-9 depends on stages: Stage-13
+  Stage-12 depends on stages: Stage-1, Stage-9, Stage-10
+  Stage-7 depends on stages: Stage-12
+  Stage-14 has a backup stage: Stage-1
+  Stage-10 depends on stages: Stage-14
+  Stage-1
+  Stage-0 depends on stages: Stage-7
+
+STAGE PLANS:
+  Stage: Stage-11
+    Conditional Operator
+
+  Stage: Stage-13
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        src2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        src2 
+          TableScan
+            alias: src2
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              HashTable Sink Operator
+                condition expressions:
+                  0 {key}
+                  1 
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-9
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key}
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col5
+                Filter Operator
+                  predicate: (_col0 + _col5) is not null (type: boolean)
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-12
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        smalltable 
+          Fetch Operator
+            limit: -1
+        smalltable2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        smalltable 
+          TableScan
+            alias: smalltable
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: UDFToDouble(key) is not null (type: boolean)
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {_col0} {_col5}
+                  1 {key}
+                keys:
+                  0 (_col0 + _col5) (type: double)
+                  1 UDFToDouble(key) (type: double)
+        smalltable2 
+          TableScan
+            alias: smalltable2
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: UDFToDouble(key) is not null (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {_col0} {_col5} {_col10}
+                  1 
+                keys:
+                  0 (_col0 + _col5) (type: double)
+                  1 UDFToDouble(key) (type: double)
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {_col0} {_col5}
+                1 {key}
+              keys:
+                0 (_col0 + _col5) (type: double)
+                1 UDFToDouble(key) (type: double)
+              outputColumnNames: _col0, _col5, _col10
+              Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+              Filter Operator
+                predicate: (_col0 + _col5) is not null (type: boolean)
+                Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {_col0} {_col5} {_col10}
+                    1 
+                  keys:
+                    0 (_col0 + _col5) (type: double)
+                    1 UDFToDouble(key) (type: double)
+                  outputColumnNames: _col0, _col5, _col10
+                  Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-14
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        src1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        src1 
+          TableScan
+            alias: src1
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src2
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key}
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col5
+                Filter Operator
+                  predicate: (_col0 + _col5) is not null (type: boolean)
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            alias: src2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {KEY.reducesinkkey0}
+            1 {KEY.reducesinkkey0}
+          outputColumnNames: _col0, _col5
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (_col0 + _col5) is not null (type: boolean)
+            Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+4	4	8
+4	4	8

Added: hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out Wed Dec  3 05:48:21 2014
@@ -0,0 +1,311 @@
+PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-7 is a root stage
+  Stage-5 depends on stages: Stage-7
+  Stage-0 depends on stages: Stage-5
+
+STAGE PLANS:
+  Stage: Stage-7
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        smalltable 
+          Fetch Operator
+            limit: -1
+        src1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        smalltable 
+          TableScan
+            alias: smalltable
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: UDFToDouble(key) is not null (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {_col0} {_col5}
+                  1 {key}
+                keys:
+                  0 (_col0 + _col5) (type: double)
+                  1 UDFToDouble(key) (type: double)
+        src1 
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key}
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col5
+                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col0 + _col5) is not null (type: boolean)
+                  Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {_col0} {_col5}
+                      1 {key}
+                    keys:
+                      0 (_col0 + _col5) (type: double)
+                      1 UDFToDouble(key) (type: double)
+                    outputColumnNames: _col0, _col5, _col10
+                    Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-10 is a root stage
+  Stage-7 depends on stages: Stage-10
+  Stage-0 depends on stages: Stage-7
+
+STAGE PLANS:
+  Stage: Stage-10
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        smalltable 
+          Fetch Operator
+            limit: -1
+        smalltable2 
+          Fetch Operator
+            limit: -1
+        src1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        smalltable 
+          TableScan
+            alias: smalltable
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: UDFToDouble(key) is not null (type: boolean)
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {_col0} {_col5}
+                  1 {key}
+                keys:
+                  0 (_col0 + _col5) (type: double)
+                  1 UDFToDouble(key) (type: double)
+        smalltable2 
+          TableScan
+            alias: smalltable2
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: UDFToDouble(key) is not null (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 {_col0} {_col5} {_col10}
+                  1 
+                keys:
+                  0 (_col0 + _col5) (type: double)
+                  1 UDFToDouble(key) (type: double)
+        src1 
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {key}
+                  1 {key}
+                keys:
+                  0 key (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col5
+                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col0 + _col5) is not null (type: boolean)
+                  Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {_col0} {_col5}
+                      1 {key}
+                    keys:
+                      0 (_col0 + _col5) (type: double)
+                      1 UDFToDouble(key) (type: double)
+                    outputColumnNames: _col0, _col5, _col10
+                    Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                    Filter Operator
+                      predicate: (_col0 + _col5) is not null (type: boolean)
+                      Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        condition expressions:
+                          0 {_col0} {_col5} {_col10}
+                          1 
+                        keys:
+                          0 (_col0 + _col5) (type: double)
+                          1 UDFToDouble(key) (type: double)
+                        outputColumnNames: _col0, _col5, _col10
+                        Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                        Select Operator
+                          expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+4	4	8
+4	4	8

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out Wed Dec  3 05:48:21 2014
@@ -0,0 +1,347 @@
+PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key}
+                        1 {key}
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col5
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                      Filter Operator
+                        predicate: (_col0 + _col5) is not null (type: boolean)
+                        Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: (_col0 + _col5) (type: double)
+                          sort order: +
+                          Map-reduce partition columns: (_col0 + _col5) (type: double)
+                          Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: string), _col5 (type: string)
+            Local Work:
+              Map Reduce Local Work
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: smalltable
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: UDFToDouble(key) is not null (type: boolean)
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: UDFToDouble(key) (type: double)
+                      sort order: +
+                      Map-reduce partition columns: UDFToDouble(key) (type: double)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: key (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0} {VALUE._col5}
+                  1 {VALUE._col0}
+                outputColumnNames: _col0, _col5, _col10
+                Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2, Stage-3
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: smalltable2
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: UDFToDouble(key) is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {_col0} {_col5} {_col10}
+                        1 
+                      keys:
+                        0 (_col0 + _col5) (type: double)
+                        1 UDFToDouble(key) (type: double)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key}
+                        1 {key}
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col5
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                      Filter Operator
+                        predicate: (_col0 + _col5) is not null (type: boolean)
+                        Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: (_col0 + _col5) (type: double)
+                          sort order: +
+                          Map-reduce partition columns: (_col0 + _col5) (type: double)
+                          Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: string), _col5 (type: string)
+            Local Work:
+              Map Reduce Local Work
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: smalltable
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: UDFToDouble(key) is not null (type: boolean)
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: UDFToDouble(key) (type: double)
+                      sort order: +
+                      Map-reduce partition columns: UDFToDouble(key) (type: double)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: key (type: string)
+        Reducer 2 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0} {VALUE._col5}
+                  1 {VALUE._col0}
+                outputColumnNames: _col0, _col5, _col10
+                Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col0 + _col5) is not null (type: boolean)
+                  Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {_col0} {_col5} {_col10}
+                      1 
+                    keys:
+                      0 (_col0 + _col5) (type: double)
+                      1 UDFToDouble(key) (type: double)
+                    outputColumnNames: _col0, _col5, _col10
+                    input vertices:
+                      1 Map 5
+                    Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+4	4	8
+4	4	8

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out Wed Dec  3 05:48:21 2014
@@ -0,0 +1,327 @@
+PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: src2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: smalltable
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: UDFToDouble(key) is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {_col0} {_col5}
+                        1 {key}
+                      keys:
+                        0 (_col0 + _col5) (type: double)
+                        1 UDFToDouble(key) (type: double)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key}
+                        1 {key}
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col5
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                      Filter Operator
+                        predicate: (_col0 + _col5) is not null (type: boolean)
+                        Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                        Map Join Operator
+                          condition map:
+                               Inner Join 0 to 1
+                          condition expressions:
+                            0 {_col0} {_col5}
+                            1 {key}
+                          keys:
+                            0 (_col0 + _col5) (type: double)
+                            1 UDFToDouble(key) (type: double)
+                          outputColumnNames: _col0, _col5, _col10
+                          input vertices:
+                            1 Map 3
+                          Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                          Select Operator
+                            expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                            outputColumnNames: _col0, _col1, _col2
+                            Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                            File Output Operator
+                              compressed: false
+                              Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                              table:
+                                  input format: org.apache.hadoop.mapred.TextInputFormat
+                                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: src2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: smalltable
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: UDFToDouble(key) is not null (type: boolean)
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {_col0} {_col5}
+                        1 {key}
+                      keys:
+                        0 (_col0 + _col5) (type: double)
+                        1 UDFToDouble(key) (type: double)
+            Local Work:
+              Map Reduce Local Work
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: smalltable2
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: UDFToDouble(key) is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {_col0} {_col5} {_col10}
+                        1 
+                      keys:
+                        0 (_col0 + _col5) (type: double)
+                        1 UDFToDouble(key) (type: double)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key}
+                        1 {key}
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col5
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                      Filter Operator
+                        predicate: (_col0 + _col5) is not null (type: boolean)
+                        Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+                        Map Join Operator
+                          condition map:
+                               Inner Join 0 to 1
+                          condition expressions:
+                            0 {_col0} {_col5}
+                            1 {key}
+                          keys:
+                            0 (_col0 + _col5) (type: double)
+                            1 UDFToDouble(key) (type: double)
+                          outputColumnNames: _col0, _col5, _col10
+                          input vertices:
+                            1 Map 3
+                          Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+                          Filter Operator
+                            predicate: (_col0 + _col5) is not null (type: boolean)
+                            Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+                            Map Join Operator
+                              condition map:
+                                   Inner Join 0 to 1
+                              condition expressions:
+                                0 {_col0} {_col5} {_col10}
+                                1 
+                              keys:
+                                0 (_col0 + _col5) (type: double)
+                                1 UDFToDouble(key) (type: double)
+                              outputColumnNames: _col0, _col5, _col10
+                              input vertices:
+                                1 Map 4
+                              Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                              Select Operator
+                                expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+                                outputColumnNames: _col0, _col1, _col2
+                                Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                                File Output Operator
+                                  compressed: false
+                                  Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+                                  table:
+                                      input format: org.apache.hadoop.mapred.TextInputFormat
+                                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4	4	8
+4	4	8
+4	4	8
+4	4	8



Mime
View raw message