hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kevinwilf...@apache.org
Subject svn commit: r1411349 [1/2] - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ test/queries/clientpositive/ test/results/clientpositive/
Date Mon, 19 Nov 2012 18:39:59 GMT
Author: kevinwilfong
Date: Mon Nov 19 18:39:57 2012
New Revision: 1411349

URL: http://svn.apache.org/viewvc?rev=1411349&view=rev
Log:
HIVE-3647. map-side groupby wrongly due to HIVE-3432. (njain via kevinwilfong)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_2.q
    hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_3.q
    hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_4.q
    hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_5.q
    hive/trunk/ql/src/test/results/clientpositive/groupby_sort_2.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_sort_3.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_sort_4.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_sort_5.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
    hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_1.q
    hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
    hive/trunk/ql/src/test/results/clientpositive/groupby_sort_1.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java?rev=1411349&r1=1411348&r2=1411349&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java Mon Nov 19 18:39:57 2012
@@ -84,26 +84,26 @@ public class GroupByOptimizer implements
     if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
       // process group-by pattern
       opRules.put(new RuleRegExp("R1",
-        GroupByOperator.getOperatorName() + "%" +
-        ReduceSinkOperator.getOperatorName() + "%" +
-        GroupByOperator.getOperatorName() + "%"),
-        getMapSortedGroupbyProc(pctx));
+          GroupByOperator.getOperatorName() + "%" +
+              ReduceSinkOperator.getOperatorName() + "%" +
+              GroupByOperator.getOperatorName() + "%"),
+          getMapSortedGroupbyProc(pctx));
     } else {
       // If hive.groupby.skewindata is set to true, the operator tree is as below
       opRules.put(new RuleRegExp("R2",
-        GroupByOperator.getOperatorName() + "%" +
-        ReduceSinkOperator.getOperatorName() + "%" +
-        GroupByOperator.getOperatorName() + "%" +
-        ReduceSinkOperator.getOperatorName() + "%" +
-        GroupByOperator.getOperatorName() + "%"),
-        getMapSortedGroupbySkewProc(pctx));
+          GroupByOperator.getOperatorName() + "%" +
+              ReduceSinkOperator.getOperatorName() + "%" +
+              GroupByOperator.getOperatorName() + "%" +
+              ReduceSinkOperator.getOperatorName() + "%" +
+              GroupByOperator.getOperatorName() + "%"),
+          getMapSortedGroupbySkewProc(pctx));
     }
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
     Dispatcher disp =
-      new DefaultRuleDispatcher(getDefaultProc(), opRules,
-      new GroupByOptimizerContext(conf));
+        new DefaultRuleDispatcher(getDefaultProc(), opRules,
+            new GroupByOptimizerContext(conf));
     GraphWalker ogw = new DefaultGraphWalker(disp);
 
     // Create a list of topop nodes
@@ -118,7 +118,7 @@ public class GroupByOptimizer implements
     return new NodeProcessor() {
       @Override
       public Object process(Node nd, Stack<Node> stack,
-        NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
+          NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
         return null;
       }
     };
@@ -136,6 +136,10 @@ public class GroupByOptimizer implements
     NO_MATCH, PARTIAL_MATCH, COMPLETE_MATCH
   };
 
+  private enum ColumnOrderMatch {
+    NO_MATCH, PREFIX_COL1_MATCH, PREFIX_COL2_MATCH, COMPLETE_MATCH
+  };
+
   /**
    * SortGroupByProcessor.
    *
@@ -150,8 +154,8 @@ public class GroupByOptimizer implements
 
     // Check if the group by operator has already been processed
     protected boolean checkGroupByOperatorProcessed(
-      GroupByOptimizerContext groupBySortOptimizerContext,
-      GroupByOperator groupByOp) {
+        GroupByOptimizerContext groupBySortOptimizerContext,
+        GroupByOperator groupByOp) {
 
       // The group by operator has already been processed
       if (groupBySortOptimizerContext.getListGroupByOperatorsProcessed().contains(groupByOp)) {
@@ -163,21 +167,19 @@ public class GroupByOptimizer implements
     }
 
     protected void processGroupBy(GroupByOptimizerContext ctx,
-      Stack<Node> stack,
-      GroupByOperator groupByOp,
-      int depth) throws SemanticException {
+        Stack<Node> stack,
+        GroupByOperator groupByOp,
+        int depth) throws SemanticException {
       HiveConf hiveConf = ctx.getConf();
       GroupByOptimizerSortMatch match = checkSortGroupBy(stack, groupByOp);
       boolean useMapperSort =
-        HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT);
+          HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT);
 
-      if (useMapperSort) {
-        if (match == GroupByOptimizerSortMatch.COMPLETE_MATCH) {
-          convertGroupByMapSideSortedGroupBy(groupByOp, depth);
-        }
+      if (useMapperSort && (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
+        convertGroupByMapSideSortedGroupBy(groupByOp, depth);
       }
       else if ((match == GroupByOptimizerSortMatch.PARTIAL_MATCH) ||
-        (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
+          (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
         groupByOp.getConf().setBucketGroup(true);
       }
     }
@@ -188,7 +190,7 @@ public class GroupByOptimizer implements
       // GBY,RS,GBY... (top to bottom)
       GroupByOperator groupByOp = (GroupByOperator) stack.get(stack.size() - 3);
 
-      GroupByOptimizerContext ctx = (GroupByOptimizerContext)procCtx;
+      GroupByOptimizerContext ctx = (GroupByOptimizerContext) procCtx;
 
       if (!checkGroupByOperatorProcessed(ctx, groupByOp)) {
         processGroupBy(ctx, stack, groupByOp, 2);
@@ -199,8 +201,8 @@ public class GroupByOptimizer implements
     // Should this group by be converted to a map-side group by, because the grouping keys for
     // the base table for the group by matches the skewed keys
     protected GroupByOptimizerSortMatch checkSortGroupBy(Stack<Node> stack,
-      GroupByOperator groupByOp)
-      throws SemanticException {
+        GroupByOperator groupByOp)
+        throws SemanticException {
 
       // if this is not a HASH groupby, return
       if (groupByOp.getConf().getMode() != GroupByDesc.Mode.HASH) {
@@ -226,7 +228,7 @@ public class GroupByOptimizer implements
       }
 
       // currOp now points to the top-most tablescan operator
-      TableScanOperator tableScanOp = (TableScanOperator)currOp;
+      TableScanOperator tableScanOp = (TableScanOperator) currOp;
       int stackPos = 0;
       assert stack.get(0) == tableScanOp;
 
@@ -241,11 +243,11 @@ public class GroupByOptimizer implements
       while (currOp != groupByOp) {
         Operator<? extends OperatorDesc> processOp = currOp;
         Set<String> newConstantCols = new HashSet<String>();
-        currOp = (Operator<? extends OperatorDesc>)(stack.get(++stackPos));
+        currOp = (Operator<? extends OperatorDesc>) (stack.get(++stackPos));
 
         // Filters don't change the column names - so, no need to do anything for them
         if (processOp instanceof SelectOperator) {
-          SelectOperator selectOp = (SelectOperator)processOp;
+          SelectOperator selectOp = (SelectOperator) processOp;
           SelectDesc selectDesc = selectOp.getConf();
 
           if (selectDesc.isSelStarNoCompute()) {
@@ -264,7 +266,7 @@ public class GroupByOptimizer implements
             ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
             if (selectColList instanceof ExprNodeColumnDesc) {
               String newValue =
-                tableColsMapping.get(((ExprNodeColumnDesc) selectColList).getColumn());
+                  tableColsMapping.get(((ExprNodeColumnDesc) selectColList).getColumn());
               tableColsMapping.put(outputColumnName, newValue);
             }
             else {
@@ -287,7 +289,7 @@ public class GroupByOptimizer implements
       // the sorting property is not obeyed
       for (ExprNodeDesc expr : groupByOp.getConf().getKeys()) {
         if (expr instanceof ExprNodeColumnDesc) {
-          String groupByKeyColumn = ((ExprNodeColumnDesc)expr).getColumn();
+          String groupByKeyColumn = ((ExprNodeColumnDesc) expr).getColumn();
           // ignore if it is a constant
           if (constantCols.contains(groupByKeyColumn)) {
             continue;
@@ -303,7 +305,7 @@ public class GroupByOptimizer implements
         }
         // Constants and nulls are OK
         else if ((expr instanceof ExprNodeConstantDesc) ||
-          (expr instanceof ExprNodeNullDesc)) {
+            (expr instanceof ExprNodeNullDesc)) {
           continue;
         } else {
           return GroupByOptimizerSortMatch.NO_MATCH;
@@ -312,17 +314,18 @@ public class GroupByOptimizer implements
 
       if (!table.isPartitioned()) {
         List<String> sortCols = Utilities.getColumnNamesFromSortCols(table.getSortCols());
-        return matchSortColumns(groupByCols, sortCols);
+        List<String> bucketCols = table.getBucketCols();
+        return matchBucketSortCols(groupByCols, bucketCols, sortCols);
       } else {
         PrunedPartitionList partsList = null;
         try {
           partsList = pGraphContext.getOpToPartList().get(tableScanOp);
           if (partsList == null) {
             partsList = PartitionPruner.prune(table,
-              pGraphContext.getOpToPartPruner().get(tableScanOp),
-              pGraphContext.getConf(),
-              table.getTableName(),
-              pGraphContext.getPrunedPartitions());
+                pGraphContext.getOpToPartPruner().get(tableScanOp),
+                pGraphContext.getConf(),
+                table.getTableName(),
+                pGraphContext.getPrunedPartitions());
             pGraphContext.getOpToPartList().put(tableScanOp, partsList);
           }
         } catch (HiveException e) {
@@ -333,7 +336,8 @@ public class GroupByOptimizer implements
         GroupByOptimizerSortMatch currentMatch = GroupByOptimizerSortMatch.COMPLETE_MATCH;
         for (Partition part : partsList.getNotDeniedPartns()) {
           List<String> sortCols = part.getSortColNames();
-          GroupByOptimizerSortMatch match = matchSortColumns(groupByCols, sortCols);
+          List<String> bucketCols = part.getBucketCols();
+          GroupByOptimizerSortMatch match = matchBucketSortCols(groupByCols, bucketCols, sortCols);
           if (match == GroupByOptimizerSortMatch.NO_MATCH) {
             return match;
           }
@@ -346,34 +350,100 @@ public class GroupByOptimizer implements
       }
     }
 
+    /*
+     * Return how the list of columns passed in match.
+     * Return NO_MATCH if either of the list is empty or null, or if there is a mismatch.
+     * For eg: ([], []), ([], ["a"]), (["a"],["b"]) and (["a", "b"], ["a","c"]) return NO_MATCH
+     *
+     * Return COMPLETE_MATCH if both the lists are non-empty and are same
+     * Return PREFIX_COL1_MATCH if list1 is a strict subset of list2 and
+     * return PREFIX_COL2_MATCH if list2 is a strict subset of list1
+     *
+     * For eg: (["a"], ["a"]), (["a"], ["a", "b"]) and (["a", "b"], ["a"]) return
+     * COMPLETE_MATCH, PREFIX_COL1_MATCH and PREFIX_COL2_MATCH respectively.
+     */
+    private ColumnOrderMatch matchColumnOrder(List<String> cols1, List<String> cols2) {
+      int numCols1 = cols1 == null ? 0 : cols1.size();
+      int numCols2 = cols2 == null ? 0 : cols2.size();
+
+      if (numCols1 == 0 || numCols2 == 0) {
+        return ColumnOrderMatch.NO_MATCH;
+      }
+
+      for (int pos = 0; pos < Math.min(numCols1, numCols2); pos++) {
+        if (!cols1.get(pos).equals(cols2.get(pos))) {
+          return ColumnOrderMatch.NO_MATCH;
+        }
+      }
+
+      return (numCols1 == numCols2) ?
+          ColumnOrderMatch.COMPLETE_MATCH :
+          ((numCols1 < numCols2) ? ColumnOrderMatch.PREFIX_COL1_MATCH :
+              ColumnOrderMatch.PREFIX_COL2_MATCH);
+    }
+
     /**
-     * Given the group by keys, sort columns, this method
+     * Given the group by keys, bucket columns and sort columns, this method
      * determines if we can use sorted group by or not.
-     * We can use map-side sort group by group by columns match the sorted columns
-     * in exactly the same order.
      *
      * @param groupByCols
+     * @param bucketCols
      * @param sortCols
      * @return
      * @throws SemanticException
      */
-    private GroupByOptimizerSortMatch matchSortColumns(
-      List<String> groupByCols,
-      List<String> sortCols) throws SemanticException {
-
-      if (sortCols == null || sortCols.size() == 0) {
+    private GroupByOptimizerSortMatch matchBucketSortCols(
+        List<String> groupByCols,
+        List<String> bucketCols,
+        List<String> sortCols) throws SemanticException {
+
+      /*
+       * >> Super set of
+       * If the grouping columns are a,b,c and the sorting columns are a,b
+       * grouping columns >> sorting columns
+       * (or grouping columns are a superset of sorting columns)
+       *
+       * Similarly << means subset of
+       *
+       * No intersection between Sort Columns and BucketCols:
+       *
+       * 1. Sort Cols = Group By Cols ---> Partial Match
+       * 2. Group By Cols >> Sort By Cols --> No Match
+       * 3. Group By Cols << Sort By Cols --> Partial Match
+       *
+       * BucketCols <= SortCols (bucket columns is either same or a prefix of sort columns)
+       *
+       * 1. Sort Cols = Group By Cols ---> Complete Match
+       * 2. Group By Cols >> Sort By Cols --> No Match
+       * 3. Group By Cols << Sort By Cols --> Complete Match if Group By Cols >= BucketCols
+       * --> Partial Match otherwise
+       *
+       * BucketCols >> SortCols (bucket columns is a superset of sorting columns)
+       *
+       * 1. group by cols <= sort cols --> partial match
+       * 2. group by cols >> sort cols --> no match
+       *
+       * One exception to this rule is:
+       * If GroupByCols == SortCols and all bucketing columns are part of sorting columns
+       * (in any order), it is a complete match
+       */
+      ColumnOrderMatch bucketSortColsMatch = matchColumnOrder(bucketCols, sortCols);
+      ColumnOrderMatch sortGroupByColsMatch = matchColumnOrder(sortCols, groupByCols);
+      switch (sortGroupByColsMatch) {
+      case NO_MATCH:
         return GroupByOptimizerSortMatch.NO_MATCH;
+      case COMPLETE_MATCH:
+        return ((bucketCols != null) && !bucketCols.isEmpty() && sortCols.containsAll(bucketCols)) ?
+          GroupByOptimizerSortMatch.COMPLETE_MATCH : GroupByOptimizerSortMatch.PARTIAL_MATCH;
+      case PREFIX_COL1_MATCH:
+        return GroupByOptimizerSortMatch.NO_MATCH;
+      case PREFIX_COL2_MATCH:
+        return ((bucketSortColsMatch == ColumnOrderMatch.NO_MATCH) ||
+            (bucketCols.size() > groupByCols.size())) ?
+            GroupByOptimizerSortMatch.PARTIAL_MATCH :
+            GroupByOptimizerSortMatch.COMPLETE_MATCH;
       }
-
-      int num = sortCols.size() <  groupByCols.size() ? sortCols.size() : groupByCols.size();
-      for (int i = 0; i < num; i++) {
-        if (!sortCols.get(i).equals(groupByCols.get(i))) {
-          return GroupByOptimizerSortMatch.NO_MATCH;
-        }
-      }
-
-      return sortCols.size() == groupByCols.size() ?
-        GroupByOptimizerSortMatch.COMPLETE_MATCH : GroupByOptimizerSortMatch.PARTIAL_MATCH;
+      return GroupByOptimizerSortMatch.NO_MATCH;
     }
 
     // Convert the group by to a map-side group by
@@ -401,7 +471,7 @@ public class GroupByOptimizer implements
         Object... nodeOutputs) throws SemanticException {
       // GBY,RS,GBY,RS,GBY... (top to bottom)
       GroupByOperator groupByOp = (GroupByOperator) stack.get(stack.size() - 5);
-      GroupByOptimizerContext ctx = (GroupByOptimizerContext)procCtx;
+      GroupByOptimizerContext ctx = (GroupByOptimizerContext) procCtx;
 
       if (!checkGroupByOperatorProcessed(ctx, groupByOp)) {
         processGroupBy(ctx, stack, groupByOp, 4);
@@ -424,7 +494,7 @@ public class GroupByOptimizer implements
     }
 
     public void setListGroupByOperatorsProcessed(
-      List<GroupByOperator> listGroupByOperatorsProcessed) {
+        List<GroupByOperator> listGroupByOperatorsProcessed) {
       this.listGroupByOperatorsProcessed = listGroupByOperatorsProcessed;
     }
 

Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_1.q?rev=1411349&r1=1411348&r2=1411349&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_1.q Mon Nov 19 18:39:57 2012
@@ -14,7 +14,7 @@ INSERT OVERWRITE TABLE T1 select key, va
 CREATE TABLE outputTbl1(key int, cnt int);
 
 -- The plan should be converted to a map-side group by if the group by key
--- matches the skewed key
+-- matches the sorted key
 -- addind a order by at the end to make the test results deterministic
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl1
@@ -27,7 +27,7 @@ SELECT * FROM outputTbl1 ORDER BY key;
 
 CREATE TABLE outputTbl2(key1 int, key2 string, cnt int);
 
--- no map-side group by even if the group by key is a superset of skewed key
+-- no map-side group by even if the group by key is a superset of sorted key
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl2
 SELECT key, val, count(1) FROM T1 GROUP BY key, val;
@@ -60,7 +60,7 @@ SELECT * FROM outputTbl1 ORDER BY key;
 CREATE TABLE outputTbl3(key1 int, key2 int, cnt int);
 
 -- The plan should be converted to a map-side group by if the group by key contains a constant followed
--- by a match to the skewed key
+-- by a match to the sorted key
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl3
 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key;
@@ -188,7 +188,7 @@ SELECT key, count(1) FROM T2 GROUP BY ke
 SELECT * FROM outputTbl1 ORDER BY key;
 
 -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys
+-- sorted keys
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl4
 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val;
@@ -201,7 +201,7 @@ SELECT * FROM outputTbl4 ORDER BY key1, 
 CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int);
 
 -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys followed by anything
+-- sorted keys followed by anything
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl5
 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2;

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_2.q?rev=1411349&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_2.q Mon Nov 19 18:39:57 2012
@@ -0,0 +1,25 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 10;
+set hive.map.groupby.sorted=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (val) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1;
+
+CREATE TABLE outputTbl1(val string, cnt int);
+
+-- The plan should not be converted to a map-side group by even though the group by key
+-- matches the sorted key. Adding a order by at the end to make the test results deterministic
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT val, count(1) FROM T1 GROUP BY val;
+
+INSERT OVERWRITE TABLE outputTbl1
+SELECT val, count(1) FROM T1 GROUP BY val;
+
+SELECT * FROM outputTbl1 ORDER BY val;

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_3.q?rev=1411349&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_3.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_3.q Mon Nov 19 18:39:57 2012
@@ -0,0 +1,36 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 10;
+set hive.map.groupby.sorted=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1;
+
+CREATE TABLE outputTbl1(key string, val string, cnt int);
+
+-- The plan should be converted to a map-side group by
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+SELECT * FROM outputTbl1 ORDER BY key, val;
+
+CREATE TABLE outputTbl2(key string, cnt int);
+
+-- The plan should be converted to a map-side group by
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key;
+
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key;
+
+SELECT * FROM outputTbl2 ORDER BY key;

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_4.q?rev=1411349&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_4.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_4.q Mon Nov 19 18:39:57 2012
@@ -0,0 +1,38 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 10;
+set hive.map.groupby.sorted=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key, val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1;
+
+CREATE TABLE outputTbl1(key STRING, cnt INT);
+
+-- The plan should not be converted to a map-side group by.
+-- However, there should no hash-based aggregation on the map-side
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key;
+
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key;
+
+SELECT * FROM outputTbl1 ORDER BY key;
+
+CREATE TABLE outputTbl2(key STRING, val STRING, cnt INT);
+
+-- The plan should not be converted to a map-side group by.
+-- Hash-based aggregations should be performed on the map-side
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+SELECT * FROM outputTbl2 ORDER BY key, val;

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_5.q?rev=1411349&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_5.q Mon Nov 19 18:39:57 2012
@@ -0,0 +1,75 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 10;
+set hive.map.groupby.sorted=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1;
+
+CREATE TABLE outputTbl1(key STRING, val STRING, cnt INT);
+
+-- The plan should be converted to a map-side group by, since the
+-- sorting columns and grouping columns match, and all the bucketing columns
+-- are part of sorting columns
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+SELECT * FROM outputTbl1 ORDER BY key, val;
+
+DROP TABLE T1;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (val, key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1;
+
+-- The plan should be converted to a map-side group by, since the
+-- sorting columns and grouping columns match, and all the bucketing columns
+-- are part of sorting columns
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+SELECT * FROM outputTbl1 ORDER BY key, val;
+
+DROP TABLE T1;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1;
+
+CREATE TABLE outputTbl2(key STRING, cnt INT);
+
+-- The plan should not be converted to a map-side group by, since although the
+-- sorting columns and grouping columns match, all the bucketing columns
+-- are not part of sorting columns. However, no hash map aggregation is required
+-- on the mapside.
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key;
+
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key;
+
+SELECT * FROM outputTbl2 ORDER BY key;
+
+DROP TABLE T1;

Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q?rev=1411349&r1=1411348&r2=1411349&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q Mon Nov 19 18:39:57 2012
@@ -15,7 +15,7 @@ INSERT OVERWRITE TABLE T1 select key, va
 CREATE TABLE outputTbl1(key int, cnt int);
 
 -- The plan should be converted to a map-side group by if the group by key
--- matches the skewed key
+-- matches the sorted key
 -- addind a order by at the end to make the test results deterministic
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl1
@@ -28,7 +28,7 @@ SELECT * FROM outputTbl1 ORDER BY key;
 
 CREATE TABLE outputTbl2(key1 int, key2 string, cnt int);
 
--- no map-side group by even if the group by key is a superset of skewed key
+-- no map-side group by even if the group by key is a superset of sorted key
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl2
 SELECT key, val, count(1) FROM T1 GROUP BY key, val;
@@ -61,7 +61,7 @@ SELECT * FROM outputTbl1 ORDER BY key;
 CREATE TABLE outputTbl3(key1 int, key2 int, cnt int);
 
 -- The plan should be converted to a map-side group by if the group by key contains a constant followed
--- by a match to the skewed key
+-- by a match to the sorted key
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl3
 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key;
@@ -189,7 +189,7 @@ SELECT key, count(1) FROM T2 GROUP BY ke
 SELECT * FROM outputTbl1 ORDER BY key;
 
 -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys
+-- sorted keys
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl4
 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val;
@@ -202,7 +202,7 @@ SELECT * FROM outputTbl4 ORDER BY key1, 
 CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int);
 
 -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys followed by anything
+-- sorted keys followed by anything
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl5
 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2;

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_1.q.out?rev=1411349&r1=1411348&r2=1411349&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_1.q.out Mon Nov 19 18:39:57 2012
@@ -31,14 +31,14 @@ POSTHOOK: Output: default@outputTbl1
 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
 POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key
--- matches the skewed key
+-- matches the sorted key
 -- addind a order by at the end to make the test results deterministic
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl1
 SELECT key, count(1) FROM T1 GROUP BY key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key
--- matches the skewed key
+-- matches the sorted key
 -- addind a order by at the end to make the test results deterministic
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl1
@@ -234,12 +234,12 @@ POSTHOOK: Lineage: outputtbl1.cnt EXPRES
 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
 POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
-PREHOOK: query: -- no map-side group by even if the group by key is a superset of skewed key
+PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl2
 SELECT key, val, count(1) FROM T1 GROUP BY key, val
 PREHOOK: type: QUERY
-POSTHOOK: query: -- no map-side group by even if the group by key is a superset of skewed key
+POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE outputTbl2
 SELECT key, val, count(1) FROM T1 GROUP BY key, val
@@ -935,13 +935,13 @@ POSTHOOK: Lineage: outputtbl2.key2 SIMPL
 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
 POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed
--- by a match to the skewed key
+-- by a match to the sorted key
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl3
 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed
--- by a match to the skewed key
+-- by a match to the sorted key
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl3
 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
@@ -3772,7 +3772,7 @@ STAGE PLANS:
               Group By Operator
                 aggregations:
                       expr: count(1)
-                bucketGroup: false
+                bucketGroup: true
                 keys:
                       expr: key
                       type: string
@@ -4018,13 +4018,13 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1
 7	1
 8	2
 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys
+-- sorted keys
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl4
 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val
 PREHOOK: type: QUERY
 POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys
+-- sorted keys
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl4
 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val
@@ -4375,13 +4375,13 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1
 POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
 POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys followed by anything
+-- sorted keys followed by anything
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl5
 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2
 PREHOOK: type: QUERY
 POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
--- skewed keys followed by anything
+-- sorted keys followed by anything
 EXPLAIN EXTENDED 
 INSERT OVERWRITE TABLE outputTbl5
 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2
@@ -5645,7 +5645,7 @@ STAGE PLANS:
               Group By Operator
                 aggregations:
                       expr: count(1)
-                bucketGroup: false
+                bucketGroup: true
                 keys:
                       expr: key
                       type: string
@@ -6076,7 +6076,7 @@ STAGE PLANS:
                   Group By Operator
                     aggregations:
                           expr: count(1)
-                    bucketGroup: false
+                    bucketGroup: true
                     keys:
                           expr: _col0
                           type: string

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_2.q.out?rev=1411349&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_2.q.out Mon Nov 19 18:39:57 2012
@@ -0,0 +1,166 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (val) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (val) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE outputTbl1(val string, cnt int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE outputTbl1(val string, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: -- The plan should not be converted to a map-side group by even though the group by key
+-- matches the sorted key. Adding a order by at the end to make the test results deterministic
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT val, count(1) FROM T1 GROUP BY val
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The plan should not be converted to a map-side group by even though the group by key
+-- matches the sorted key. Adding a order by at the end to make the test results deterministic
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT val, count(1) FROM T1 GROUP BY val
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL val))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Select Operator
+              expressions:
+                    expr: val
+                    type: string
+              outputColumnNames: val
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: true
+                keys:
+                      expr: val
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: UDFToInteger(_col1)
+                    type: int
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT val, count(1) FROM T1 GROUP BY val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT val, count(1) FROM T1 GROUP BY val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+11	1
+12	1
+13	1
+17	1
+18	1
+28	1

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_3.q.out?rev=1411349&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_3.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_3.q.out Mon Nov 19 18:39:57 2012
@@ -0,0 +1,280 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE outputTbl1(key string, val string, cnt int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE outputTbl1(key string, val string, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: -- The plan should be converted to a map-side group by
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The plan should be converted to a map-side group by
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: val
+                    type: string
+              outputColumnNames: key, val
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                        expr: _col2
+                        type: bigint
+                  outputColumnNames: _col0, _col1, _col2
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                          expr: UDFToInteger(_col2)
+                          type: int
+                    outputColumnNames: _col0, _col1, _col2
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.outputtbl1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+1	11	1
+2	12	1
+3	13	1
+7	17	1
+8	18	1
+8	28	1
+PREHOOK: query: CREATE TABLE outputTbl2(key string, cnt int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE outputTbl2(key string, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl2
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: -- The plan should be converted to a map-side group by
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The plan should be converted to a map-side group by
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: final
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: UDFToInteger(_col1)
+                          type: int
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.outputtbl2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl2
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl2
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl2
+SELECT key, count(1) FROM T1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl2
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl2 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl2 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+1	1
+2	1
+3	1
+7	1
+8	2

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_4.q.out?rev=1411349&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_4.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_4.q.out Mon Nov 19 18:39:57 2012
@@ -0,0 +1,330 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key, val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key, val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE outputTbl1(key STRING, cnt INT)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE outputTbl1(key STRING, cnt INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: -- The plan should not be converted to a map-side group by.
+-- However, there should no hash-based aggregation on the map-side
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The plan should not be converted to a map-side group by.
+-- However, there should no hash-based aggregation on the map-side
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: true
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: UDFToInteger(_col1)
+                    type: int
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+1	1
+2	1
+3	1
+7	1
+8	2
+PREHOOK: query: CREATE TABLE outputTbl2(key STRING, val STRING, cnt INT)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE outputTbl2(key STRING, val STRING, cnt INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl2
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: -- The plan should not be converted to a map-side group by.
+-- Hash-based aggregations should be performed on the map-side
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The plan should not be converted to a map-side group by.
+-- Hash-based aggregations should be performed on the map-side
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: val
+                    type: string
+              outputColumnNames: key, val
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col2
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col2
+                  type: bigint
+            outputColumnNames: _col0, _col1, _col2
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+                    expr: UDFToInteger(_col2)
+                    type: int
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl2
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl2
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl2
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl2 ORDER BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl2 ORDER BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+1	11	1
+2	12	1
+3	13	1
+7	17	1
+8	18	1
+8	28	1



Mime
View raw message