hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From li...@apache.org
Subject [8/8] hive git commit: HIVE-6348: Order by/Sort by in subquery (Rui Li reviewed by Vineet Garg)
Date Fri, 30 Jun 2017 08:07:43 GMT
HIVE-6348: Order by/Sort by in subquery (Rui Li reviewed by Vineet Garg)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b11e43b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b11e43b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b11e43b1

Branch: refs/heads/master
Commit: b11e43b155362c0ebd7a0dc905bf097d0c3eff4b
Parents: 3fa4834
Author: Rui Li <lirui@apache.org>
Authored: Fri Jun 30 16:07:30 2017 +0800
Committer: Rui Li <lirui@apache.org>
Committed: Fri Jun 30 16:07:30 2017 +0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    2 +
 .../clientpositive/udf_row_sequence.q.out       |    6 +-
 data/scripts/input20_script.py                  |   18 +-
 .../test/resources/testconfiguration.properties |    2 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   51 +
 ql/src/test/queries/clientpositive/concat_op.q  |    8 +-
 .../clientpositive/correlationoptimizer14.q     |    1 +
 .../clientpositive/groupby_distinct_samekey.q   |    1 +
 ql/src/test/queries/clientpositive/input20.q    |    8 +-
 ql/src/test/queries/clientpositive/input33.q    |    8 +-
 .../test/queries/clientpositive/input3_limit.q  |    6 +-
 ql/src/test/queries/clientpositive/ppd2.q       |    2 +
 .../reduce_deduplicate_extended.q               |    1 +
 .../clientpositive/truncate_column_buckets.q    |    4 +-
 .../results/clientpositive/auto_join0.q.out     |  109 +-
 .../results/clientpositive/auto_join15.q.out    |   47 +-
 .../results/clientpositive/auto_join20.q.out    |   94 +-
 .../results/clientpositive/auto_join31.q.out    |  286 +---
 .../clientpositive/cbo_rp_auto_join0.q.out      |   98 +-
 .../test/results/clientpositive/concat_op.q.out |   24 +-
 .../clientpositive/dynamic_rdd_cache.q.out      | 1431 ------------------
 .../groupby_distinct_samekey.q.out              |   45 +-
 .../identity_project_remove_skip.q.out          |  189 +--
 .../test/results/clientpositive/input20.q.out   |   21 +-
 .../test/results/clientpositive/input33.q.out   |   21 +-
 .../results/clientpositive/input3_limit.q.out   |   44 +-
 .../limit_pushdown_negative.q.out               |   96 +-
 .../clientpositive/llap/auto_join0.q.out        |   44 +-
 .../clientpositive/llap/auto_join30.q.out       |  615 +++-----
 .../clientpositive/llap/explainuser_1.q.out     |  521 +++----
 .../llap/identity_project_remove_skip.q.out     |   65 +-
 .../test/results/clientpositive/llap/mrr.q.out  |   59 +-
 .../clientpositive/llap/tez_join_tests.q.out    |   25 +-
 .../clientpositive/llap/tez_joins_explain.q.out |   25 +-
 .../clientpositive/llap/vector_join30.q.out     | 1167 +++++---------
 .../llap/vector_number_compare_projection.q.out |  190 +--
 .../clientpositive/multi_insert_gby2.q.out      |   40 +-
 .../clientpositive/multi_insert_gby3.q.out      |  189 +--
 ql/src/test/results/clientpositive/ppd2.q.out   |   52 +-
 .../test/results/clientpositive/ppd_join4.q.out |   23 +-
 .../clientpositive/spark/auto_join0.q.out       |   43 +-
 .../clientpositive/spark/auto_join15.q.out      |   35 +-
 .../clientpositive/spark/auto_join20.q.out      |   62 +-
 .../clientpositive/spark/auto_join30.q.out      |  793 ++++------
 .../clientpositive/spark/auto_join31.q.out      |  116 +-
 .../spark/dynamic_rdd_cache.q.out               |   65 +-
 .../spark/identity_project_remove_skip.q.out    |   70 +-
 .../spark/multi_insert_gby2.q.out               |   20 +-
 .../spark/multi_insert_gby3.q.out               |  126 +-
 .../spark_multi_insert_parallel_orderby.q.out   |  103 +-
 .../clientpositive/spark/tez_join_tests.q.out   |   26 +-
 .../spark/tez_joins_explain.q.out               |   26 +-
 .../spark/truncate_column_buckets.q.out         |    8 +-
 .../truncate_column_buckets.q.out               |    8 +-
 .../vector_tablesample_rows.q.out               |   77 +-
 55 files changed, 2006 insertions(+), 5210 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 34a663d..5700fb9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1608,6 +1608,8 @@ public class HiveConf extends Configuration {
     HIVESAMPLINGNUMBERFORORDERBY("hive.optimize.sampling.orderby.number", 1000, "Total number of samples to be obtained."),
     HIVESAMPLINGPERCENTFORORDERBY("hive.optimize.sampling.orderby.percent", 0.1f, new RatioValidator(),
         "Probability with which a row will be chosen."),
+    HIVE_REMOVE_ORDERBY_IN_SUBQUERY("hive.remove.orderby.in.subquery", true,
+        "If set to true, order/sort by without limit in sub queries will be removed."),
     HIVEOPTIMIZEDISTINCTREWRITE("hive.optimize.distinct.rewrite", true, "When applicable this "
         + "optimization rewrites distinct aggregates from a single stage to multi-stage "
         + "aggregation. This may not be optimal in all cases. Ideally, whether to trigger it or "

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/contrib/src/test/results/clientpositive/udf_row_sequence.q.out
----------------------------------------------------------------------
diff --git a/contrib/src/test/results/clientpositive/udf_row_sequence.q.out b/contrib/src/test/results/clientpositive/udf_row_sequence.q.out
index 9715c75..094a71a 100644
--- a/contrib/src/test/results/clientpositive/udf_row_sequence.q.out
+++ b/contrib/src/test/results/clientpositive/udf_row_sequence.q.out
@@ -39,11 +39,11 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
-              expressions: key (type: string)
-              outputColumnNames: _col0
+              expressions: key (type: string), row_sequence() (type: bigint)
+              outputColumnNames: _col0, _col1
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
-                key expressions: row_sequence() (type: bigint)
+                key expressions: _col1 (type: bigint)
                 sort order: +
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 value expressions: _col0 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/data/scripts/input20_script.py
----------------------------------------------------------------------
diff --git a/data/scripts/input20_script.py b/data/scripts/input20_script.py
index 40e3683..223fa2b 100644
--- a/data/scripts/input20_script.py
+++ b/data/scripts/input20_script.py
@@ -18,13 +18,13 @@
 #
 import sys
 import re
-line = sys.stdin.readline()
-x = 1
-while line:
-  tem = sys.stdin.readline()
-  if line == tem:
-    x = x + 1
+dict = {}
+for line in sys.stdin.readlines():
+  if dict.has_key(line):
+    x = dict[line]
+    dict[line] = x + 1
   else:
-    print str(x).strip()+'\t'+re.sub('\t','_',line.strip())
-    line = tem
-    x = 1
\ No newline at end of file
+    dict[line] = 1
+for key in dict:
+  x = dict[key]
+  print str(x).strip()+'\t'+re.sub('\t','_',key.strip())
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 59128e2..19ff316 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -908,7 +908,6 @@ spark.query.files=add_part_multiple.q, \
   decimal_1_1.q, \
   decimal_join.q, \
   disable_merge_for_bucketing.q, \
-  dynamic_rdd_cache.q, \
   enforce_order.q, \
   escape_clusterby1.q, \
   escape_distributeby1.q, \
@@ -1388,6 +1387,7 @@ spark.query.files=add_part_multiple.q, \
 spark.only.query.files=spark_combine_equivalent_work.q,\
   spark_dynamic_partition_pruning.q,\
   spark_dynamic_partition_pruning_2.q,\
+  dynamic_rdd_cache.q, \
   spark_multi_insert_parallel_orderby.q,\
   spark_explainuser_1.q,\
   spark_vectorized_dynamic_partition_pruning.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 699fcb4..01d19f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -11256,6 +11256,51 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return genPlan(qb);
   }
 
+  private void removeOBInSubQuery(QBExpr qbExpr) {
+    if (qbExpr == null) {
+      return;
+    }
+
+    if (qbExpr.getOpcode() == QBExpr.Opcode.NULLOP) {
+      QB subQB = qbExpr.getQB();
+      QBParseInfo parseInfo = subQB.getParseInfo();
+      String alias = qbExpr.getAlias();
+      Map<String, ASTNode> destToOrderBy = parseInfo.getDestToOrderBy();
+      Map<String, ASTNode> destToSortBy = parseInfo.getDestToSortBy();
+      final String warning = "WARNING: Order/Sort by without limit in sub query or view [" +
+          alias + "] is removed, as it's pointless and bad for performance.";
+      if (destToOrderBy != null) {
+        for (String dest : destToOrderBy.keySet()) {
+          if (parseInfo.getDestLimit(dest) == null) {
+            removeASTChild(destToOrderBy.get(dest));
+            destToOrderBy.remove(dest);
+            console.printInfo(warning);
+          }
+        }
+      }
+      if (destToSortBy != null) {
+        for (String dest : destToSortBy.keySet()) {
+          if (parseInfo.getDestLimit(dest) == null) {
+            removeASTChild(destToSortBy.get(dest));
+            destToSortBy.remove(dest);
+            console.printInfo(warning);
+          }
+        }
+      }
+    } else {
+      removeOBInSubQuery(qbExpr.getQBExpr1());
+      removeOBInSubQuery(qbExpr.getQBExpr2());
+    }
+  }
+
+  private static void removeASTChild(ASTNode node) {
+    Tree parent = node.getParent();
+    if (parent != null) {
+      parent.deleteChild(node.getChildIndex());
+      node.setParent(null);
+    }
+  }
+
   void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
     // 1. Generate Resolved Parse tree from syntax tree
     LOG.info("Starting Semantic Analysis");
@@ -11265,6 +11310,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       return;
     }
 
+    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
+      for (String alias : qb.getSubqAliases()) {
+        removeOBInSubQuery(qb.getSubqForAlias(alias));
+      }
+    }
+
     // 2. Gen OP Tree from resolved Parse Tree
     Operator sinkOp = genOPTree(ast, plannerCtx);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/concat_op.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/concat_op.q b/ql/src/test/queries/clientpositive/concat_op.q
index 8bbbb92..3fa0ce4 100644
--- a/ql/src/test/queries/clientpositive/concat_op.q
+++ b/ql/src/test/queries/clientpositive/concat_op.q
@@ -24,16 +24,16 @@ create table ct2 (c int);
 insert into ct1 values (7),(5),(3),(1);
 insert into ct2 values (8),(6),(4),(2);
 
-create view ct_v1 as select * from ct1 union all select * from ct2 order by c;
+create view ct_v1 as select * from ct1 union all select * from ct2;
 
-select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1;
+select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c;
 
 
 select *, 'x' || (c&3) , 'a' || c*c+c || 'b' from ct_v1
 		order by 'a' || c*c+c || 'b';
 
-select 'x' || (c&3),collect_list(c) from ct_v1
-		group by 'x' || (c&3);
+select 'x' || (c&3) from ct_v1
+		group by 'x' || (c&3) order by 'x' || (c&3);
 
 explain select concat('a','b','c');
 explain select 'a' || 'b' || 'c';

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/correlationoptimizer14.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/correlationoptimizer14.q b/ql/src/test/queries/clientpositive/correlationoptimizer14.q
index 5547f25..7f191d2 100644
--- a/ql/src/test/queries/clientpositive/correlationoptimizer14.q
+++ b/ql/src/test/queries/clientpositive/correlationoptimizer14.q
@@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict;
 set hive.optimize.reducededuplication=true;
 set hive.optimize.reducededuplication.min.reducer=1;
 set hive.optimize.correlation=true;
+set hive.remove.orderby.in.subquery=false;
 -- This file is used to show plans of queries involving cluster by, distribute by,
 -- order by, and sort by.
 -- Right now, Correlation optimizer check the most restrictive condition

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q b/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q
index a012ae2..6a44dd1 100644
--- a/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q
+++ b/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q
@@ -1,5 +1,6 @@
 set hive.mapred.mode=nonstrict;
 -- This test covers HIVE-2332
+-- SORT_QUERY_RESULTS
 
 create table t1 (int1 int, int2 int, str1 string, str2 string);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/input20.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/input20.q b/ql/src/test/queries/clientpositive/input20.q
index ff430ab..3c2f78f 100644
--- a/ql/src/test/queries/clientpositive/input20.q
+++ b/ql/src/test/queries/clientpositive/input20.q
@@ -7,8 +7,7 @@ FROM (
   FROM src
   MAP src.key, src.key 
   USING 'cat'
-  DISTRIBUTE BY key
-  SORT BY key, value
+  DISTRIBUTE BY key, value
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
@@ -19,12 +18,11 @@ FROM (
   FROM src
   MAP src.key, src.key
   USING 'cat' 
-  DISTRIBUTE BY key
-  SORT BY key, value
+  DISTRIBUTE BY key, value
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
 USING 'python input20_script.py'
 AS key, value;
 
-SELECT * FROM dest1 SORT BY key, value;
+SELECT * FROM dest1 ORDER BY key, value;

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/input33.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/input33.q b/ql/src/test/queries/clientpositive/input33.q
index 8b6b215..3309045 100644
--- a/ql/src/test/queries/clientpositive/input33.q
+++ b/ql/src/test/queries/clientpositive/input33.q
@@ -7,8 +7,7 @@ FROM (
   FROM src
   MAP src.key, src.key 
   USING 'cat'
-  DISTRIBUTE BY key
-  SORT BY key, value
+  DISTRIBUTE BY key, value
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
@@ -19,12 +18,11 @@ FROM (
   FROM src
   MAP src.key, src.key
   USING 'cat' 
-  DISTRIBUTE BY key
-  SORT BY key, value
+  DISTRIBUTE BY key, value
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
 USING 'python input20_script.py'
 AS (key STRING, value STRING);
 
-SELECT * FROM dest1 SORT BY key, value;
+SELECT * FROM dest1 ORDER BY key, value;

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/input3_limit.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/input3_limit.q b/ql/src/test/queries/clientpositive/input3_limit.q
index f983aca..3e9af60 100644
--- a/ql/src/test/queries/clientpositive/input3_limit.q
+++ b/ql/src/test/queries/clientpositive/input3_limit.q
@@ -7,11 +7,11 @@ LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T1;
 CREATE TABLE T2(key STRING, value STRING);
 
 EXPLAIN 
-INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20;
+INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20;
 
-INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20;
+INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20;
 
-SELECT * FROM T2 SORT BY key, value;
+SELECT * FROM T2 ORDER BY key, value;
 
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/ppd2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ppd2.q b/ql/src/test/queries/clientpositive/ppd2.q
index b955652..75eb6a8 100644
--- a/ql/src/test/queries/clientpositive/ppd2.q
+++ b/ql/src/test/queries/clientpositive/ppd2.q
@@ -2,6 +2,8 @@ set hive.mapred.mode=nonstrict;
 set hive.optimize.ppd=true;
 set hive.ppd.remove.duplicatefilters=true;
 
+-- SORT_QUERY_RESULTS
+
 explain
 select b.key,b.cc
 from (

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
index 798dddc..8c9ff66 100644
--- a/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
+++ b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
@@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict;
 set hive.optimize.reducededuplication=true;
 set hive.optimize.reducededuplication.min.reducer=1;
 set hive.map.aggr=true;
+set hive.remove.orderby.in.subquery=false;
 
 -- HIVE-2340 deduplicate RS followed by RS
 -- hive.optimize.reducededuplication : wherther using this optimization

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/queries/clientpositive/truncate_column_buckets.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/truncate_column_buckets.q b/ql/src/test/queries/clientpositive/truncate_column_buckets.q
index 1cda1bf..c51a98f 100644
--- a/ql/src/test/queries/clientpositive/truncate_column_buckets.q
+++ b/ql/src/test/queries/clientpositive/truncate_column_buckets.q
@@ -11,7 +11,7 @@ INSERT OVERWRITE TABLE test_tab SELECT * FROM src;
 SELECT cnt FROM (
 SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM 
 test_tab GROUP BY INPUT__FILE__NAME
-ORDER BY file_name DESC)a;
+)a ORDER BY file_name DESC;
 
 -- Truncate a column on which the table is not bucketed
 TRUNCATE TABLE test_tab COLUMNS (value);
@@ -21,4 +21,4 @@ TRUNCATE TABLE test_tab COLUMNS (value);
 SELECT cnt FROM (
 SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM 
 test_tab GROUP BY INPUT__FILE__NAME
-ORDER BY file_name DESC)a;
+)a ORDER BY file_name DESC;

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/results/clientpositive/auto_join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join0.q.out b/ql/src/test/results/clientpositive/auto_join0.q.out
index 77940b3..d15196e 100644
--- a/ql/src/test/results/clientpositive/auto_join0.q.out
+++ b/ql/src/test/results/clientpositive/auto_join0.q.out
@@ -1,5 +1,5 @@
-Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product
-Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-6:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-6:MAPRED' is a cross product
 Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain 
 select sum(hash(a.k1,a.v1,a.k2, a.v2))
@@ -25,16 +25,15 @@ SELECT src1.key as k1, src1.value as v1,
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2
+  Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2
+  Stage-8 has a backup stage: Stage-2
+  Stage-5 depends on stages: Stage-8
+  Stage-3 depends on stages: Stage-2, Stage-5, Stage-6
   Stage-9 has a backup stage: Stage-2
   Stage-6 depends on stages: Stage-9
-  Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
-  Stage-4 depends on stages: Stage-3
-  Stage-10 has a backup stage: Stage-2
-  Stage-7 depends on stages: Stage-10
   Stage-2
-  Stage-5 is a root stage
-  Stage-0 depends on stages: Stage-4
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
   Stage: Stage-1
@@ -66,10 +65,10 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-8
+  Stage: Stage-7
     Conditional Operator
 
-  Stage: Stage-9
+  Stage: Stage-8
     Map Reduce Local Work
       Alias -> Map Local Tables:
         a:$INTNAME1 
@@ -83,7 +82,7 @@ STAGE PLANS:
                 0 
                 1 
 
-  Stage: Stage-6
+  Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -95,12 +94,17 @@ STAGE PLANS:
                 1 
               outputColumnNames: _col0, _col1, _col2, _col3
               Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: sum(hash(_col0,_col1,_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Local Work:
         Map Reduce Local Work
 
@@ -109,31 +113,6 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
-              sort order: ++++
-              Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: sum(hash(_col0,_col1,_col2,_col3))
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-4
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
               sort order: 
               Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col0 (type: bigint)
@@ -151,7 +130,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-10
+  Stage: Stage-9
     Map Reduce Local Work
       Alias -> Map Local Tables:
         a:$INTNAME 
@@ -165,7 +144,7 @@ STAGE PLANS:
                 0 
                 1 
 
-  Stage: Stage-7
+  Stage: Stage-6
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -177,12 +156,17 @@ STAGE PLANS:
                 1 
               outputColumnNames: _col0, _col1, _col2, _col3
               Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: sum(hash(_col0,_col1,_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Local Work:
         Map Reduce Local Work
 
@@ -208,14 +192,19 @@ STAGE PLANS:
             1 
           outputColumnNames: _col0, _col1, _col2, _col3
           Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Group By Operator
+            aggregations: sum(hash(_col0,_col1,_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -250,8 +239,8 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product
-Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-6:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-6:MAPRED' is a cross product
 Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2))
 from (

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/results/clientpositive/auto_join15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join15.q.out b/ql/src/test/results/clientpositive/auto_join15.q.out
index 18f9b6a..fc4eb74 100644
--- a/ql/src/test/results/clientpositive/auto_join15.q.out
+++ b/ql/src/test/results/clientpositive/auto_join15.q.out
@@ -15,13 +15,12 @@ SORT BY k1, v1, k2, v2
 ) a
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-6 is a root stage
-  Stage-2 depends on stages: Stage-6
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-6
+  Stage: Stage-5
     Map Reduce Local Work
       Alias -> Map Local Tables:
         a:src1 
@@ -61,38 +60,18 @@ STAGE PLANS:
                   expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
-                    sort order: ++++
-                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: sum(hash(_col0,_col1,_col2,_col3))
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: sum(hash(_col0,_col1,_col2,_col3))
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: bigint)
-      Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)
           mode: mergepartial

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/results/clientpositive/auto_join20.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join20.q.out b/ql/src/test/results/clientpositive/auto_join20.q.out
index 9d97fe5..8aa2f9a 100644
--- a/ql/src/test/results/clientpositive/auto_join20.q.out
+++ b/ql/src/test/results/clientpositive/auto_join20.q.out
@@ -15,13 +15,12 @@ SORT BY k1,v1,k2,v2,k3,v3
 )a
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-7 is a root stage
-  Stage-2 depends on stages: Stage-7
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
+  Stage-6 is a root stage
+  Stage-2 depends on stages: Stage-6
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-7
+  Stage: Stage-6
     Map Reduce Local Work
       Alias -> Map Local Tables:
         a:src1 
@@ -88,38 +87,18 @@ STAGE PLANS:
                 expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
-                  sort order: ++++++
-                  Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5))
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: bigint)
-      Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)
           mode: mergepartial
@@ -175,13 +154,12 @@ SORT BY k1,v1,k2,v2,k3,v3
 )a
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-7 is a root stage
-  Stage-2 depends on stages: Stage-7
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
+  Stage-6 is a root stage
+  Stage-2 depends on stages: Stage-6
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-7
+  Stage: Stage-6
     Map Reduce Local Work
       Alias -> Map Local Tables:
         a:src1 
@@ -248,38 +226,18 @@ STAGE PLANS:
                 expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
-                  sort order: ++++++
-                  Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5))
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: bigint)
-      Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)
           mode: mergepartial

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/results/clientpositive/auto_join31.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join31.q.out b/ql/src/test/results/clientpositive/auto_join31.q.out
index 1cf86cf..90aecae 100644
--- a/ql/src/test/results/clientpositive/auto_join31.q.out
+++ b/ql/src/test/results/clientpositive/auto_join31.q.out
@@ -21,224 +21,35 @@ ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value))
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2
-  Stage-9 has a backup stage: Stage-2
-  Stage-6 depends on stages: Stage-9
-  Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
-  Stage-10 has a backup stage: Stage-2
-  Stage-7 depends on stages: Stage-10
-  Stage-2
-  Stage-4 is a root stage
-  Stage-5 is a root stage
-  Stage-0 depends on stages: Stage-3
+  Stage-6 is a root stage
+  Stage-2 depends on stages: Stage-6
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: key (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string)
-                sort order: +
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string)
-          outputColumnNames: _col0
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-8
-    Conditional Operator
-
-  Stage: Stage-9
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $INTNAME 
-          Fetch Operator
-            limit: -1
-        $INTNAME2 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $INTNAME 
-          TableScan
-            HashTable Sink Operator
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-                2 _col0 (type: string)
-        $INTNAME2 
-          TableScan
-            HashTable Sink Operator
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-                2 _col0 (type: string)
-
   Stage: Stage-6
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Map Join Operator
-              condition map:
-                   Right Outer Join 0 to 1
-                   Inner Join 0 to 2
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-                2 _col0 (type: string)
-              outputColumnNames: _col2, _col3
-              Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
-              Group By Operator
-                aggregations: sum(hash(_col2,_col3))
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-      Local Work:
-        Map Reduce Local Work
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: sum(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-10
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $INTNAME 
+        x:src 
           Fetch Operator
             limit: -1
-        $INTNAME1 
+        y:src 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $INTNAME 
-          TableScan
-            HashTable Sink Operator
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-                2 _col0 (type: string)
-        $INTNAME1 
-          TableScan
-            HashTable Sink Operator
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-                2 _col0 (type: string)
-
-  Stage: Stage-7
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Map Join Operator
-              condition map:
-                   Right Outer Join 0 to 1
-                   Inner Join 0 to 2
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-                2 _col0 (type: string)
-              outputColumnNames: _col2, _col3
-              Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
-              Group By Operator
-                aggregations: sum(hash(_col2,_col3))
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-      Local Work:
-        Map Reduce Local Work
-
-  Stage: Stage-2
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: string)
+        x:src 
           TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Right Outer Join 0 to 1
-               Inner Join 0 to 2
-          keys:
-            0 _col0 (type: string)
-            1 _col0 (type: string)
-            2 _col0 (type: string)
-          outputColumnNames: _col2, _col3
-          Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: sum(hash(_col2,_col3))
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-4
-    Map Reduce
-      Map Operator Tree:
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+        y:src 
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -246,49 +57,56 @@ STAGE PLANS:
               expressions: key (type: string), value (type: string)
               outputColumnNames: _col0, _col1
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col1 (type: string)
-                sort order: +
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col0 (type: string)
-      Reduce Operator Tree:
-        Select Operator
-          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
 
-  Stage: Stage-5
+  Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
-              expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              expressions: key (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col1 (type: string)
-                sort order: +
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col0 (type: string)
+              Map Join Operator
+                condition map:
+                     Right Outer Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
       Reduce Operator Tree:
-        Select Operator
-          expressions: VALUE._col0 (type: string)
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out
index 942e447..3878bd3 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out
@@ -1,4 +1,4 @@
-Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain 
 select sum(hash(a.k1,a.v1,a.k2, a.v2))
 from (
@@ -22,13 +22,12 @@ SELECT cbo_t1.key as k1, cbo_t1.value as v1,
 ) a
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-6 is a root stage
-  Stage-2 depends on stages: Stage-6
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-6
+  Stage: Stage-5
     Map Reduce Local Work
       Alias -> Map Local Tables:
         a:cbo_t1:cbo_t3 
@@ -76,38 +75,18 @@ STAGE PLANS:
                     expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
-                      sort order: ++++
-                      Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(hash(_col0,_col1,_col2,_col3))
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint)
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE
-          Group By Operator
-            aggregations: sum(hash(_col0,_col1,_col2,_col3))
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-              value expressions: _col0 (type: bigint)
-      Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)
           mode: mergepartial
@@ -131,7 +110,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain 
 select sum(hash(a.k1,a.v1,a.k2, a.v2))
 from (
@@ -155,13 +134,12 @@ SELECT cbo_t1.key as k1, cbo_t1.value as v1,
 ) a
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-6 is a root stage
-  Stage-2 depends on stages: Stage-6
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-6
+  Stage: Stage-5
     Map Reduce Local Work
       Alias -> Map Local Tables:
         a:cbo_t1:cbo_t3 
@@ -209,38 +187,18 @@ STAGE PLANS:
                     expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
-                      sort order: ++++
-                      Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(hash(_col0,_col1,_col2,_col3))
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint)
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE
-          Group By Operator
-            aggregations: sum(hash(_col0,_col1,_col2,_col3))
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-              value expressions: _col0 (type: bigint)
-      Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)
           mode: mergepartial

http://git-wip-us.apache.org/repos/asf/hive/blob/b11e43b1/ql/src/test/results/clientpositive/concat_op.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/concat_op.q.out b/ql/src/test/results/clientpositive/concat_op.q.out
index e7fad1f..17a0e31 100644
--- a/ql/src/test/results/clientpositive/concat_op.q.out
+++ b/ql/src/test/results/clientpositive/concat_op.q.out
@@ -160,26 +160,26 @@ POSTHOOK: query: insert into ct2 values (8),(6),(4),(2)
 POSTHOOK: type: QUERY
 POSTHOOK: Output: default@ct2
 POSTHOOK: Lineage: ct2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-PREHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c
+PREHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2
 PREHOOK: type: CREATEVIEW
 PREHOOK: Input: default@ct1
 PREHOOK: Input: default@ct2
 PREHOOK: Output: database:default
 PREHOOK: Output: default@ct_v1
-POSTHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c
+POSTHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2
 POSTHOOK: type: CREATEVIEW
 POSTHOOK: Input: default@ct1
 POSTHOOK: Input: default@ct2
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@ct_v1
 POSTHOOK: Lineage: ct_v1.c EXPRESSION [(ct1)ct1.FieldSchema(name:c, type:int, comment:null), (ct2)ct2.FieldSchema(name:c, type:int, comment:null), ]
-PREHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1
+PREHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c
 PREHOOK: type: QUERY
 PREHOOK: Input: default@ct1
 PREHOOK: Input: default@ct2
 PREHOOK: Input: default@ct_v1
 #### A masked pattern was here ####
-POSTHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1
+POSTHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@ct1
 POSTHOOK: Input: default@ct2
@@ -215,24 +215,24 @@ POSTHOOK: Input: default@ct_v1
 7	x3	a56b
 2	x2	a6b
 8	x0	a72b
-PREHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1
-		group by 'x' || (c&3)
+PREHOOK: query: select 'x' || (c&3) from ct_v1
+		group by 'x' || (c&3) order by 'x' || (c&3)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@ct1
 PREHOOK: Input: default@ct2
 PREHOOK: Input: default@ct_v1
 #### A masked pattern was here ####
-POSTHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1
-		group by 'x' || (c&3)
+POSTHOOK: query: select 'x' || (c&3) from ct_v1
+		group by 'x' || (c&3) order by 'x' || (c&3)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@ct1
 POSTHOOK: Input: default@ct2
 POSTHOOK: Input: default@ct_v1
 #### A masked pattern was here ####
-x0	[4,8]
-x1	[1,5]
-x2	[2,6]
-x3	[3,7]
+x0
+x1
+x2
+x3
 PREHOOK: query: explain select concat('a','b','c')
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select concat('a','b','c')


Mime
View raw message