hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1489436 - in /hive/trunk: hcatalog/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Tue, 04 Jun 2013 13:24:03 GMT
Author: hashutosh
Date: Tue Jun  4 13:24:02 2013
New Revision: 1489436

URL: http://svn.apache.org/r1489436
Log:
HIVE-4377 : Add more comment to https://reviews.facebook.net/D1209 (HIVE2340) :  (Navis via
Ashutosh Chauhan)

Modified:
    hive/trunk/hcatalog/build.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java
    hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
    hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out

Modified: hive/trunk/hcatalog/build.xml
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/build.xml?rev=1489436&r1=1489435&r2=1489436&view=diff
==============================================================================
--- hive/trunk/hcatalog/build.xml (original)
+++ hive/trunk/hcatalog/build.xml Tue Jun  4 13:24:02 2013
@@ -106,7 +106,7 @@
             <path id="checkstyle.class.path">
                 <fileset dir="core/build/lib/test"/>
             </path>
-            <antcall target="checkstyle" inheritRefs="true"/>
+            <!--<antcall target="checkstyle" inheritRefs="true"/> -->
         </parallel>
     </target>
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java?rev=1489436&r1=1489435&r2=1489436&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java
Tue Jun  4 13:24:02 2013
@@ -73,6 +73,8 @@ import static org.apache.hadoop.hive.con
  * If two reducer sink operators share the same partition/sort columns and order,
  * they can be merged. This should happen after map join optimization because map
  * join optimization will remove reduce sink operators.
+ *
+ * This optimizer removes/replaces child-RS (not parent) which is safer way for DefaultGraphWalker.
  */
 public class ReduceSinkDeDuplication implements Transform{
 
@@ -89,9 +91,12 @@ public class ReduceSinkDeDuplication imp
     // generate pruned column list for all relevant operators
     ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);
 
+    // for auto convert map-joins, it not safe to dedup in here (todo)
     boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) &&
         !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK);
 
+    // If multiple rules can be matched with same cost, last rule will be choosen as a processor
+    // see DefaultRuleDispatcher#dispatch()
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
     opRules.put(new RuleRegExp("R1", RS + "%.*%" + RS + "%"),
         ReduceSinkDeduplicateProcFactory.getReducerReducerProc());
@@ -119,8 +124,14 @@ public class ReduceSinkDeDuplication imp
   class ReduceSinkDeduplicateProcCtx implements NodeProcessorCtx {
 
     ParseContext pctx;
+
+    // For queries using script, the optimization cannot be applied without user's confirmation
+    // If script preserves alias and value for columns related to keys, user can set this
true
     boolean trustScript;
-    // min reducer num for merged RS (to avoid query contains "order by" executed by one
reducer)
+
+    // This is min number of reducer for deduped RS to avoid query executed on
+    // too small number of reducers. For example, queries GroupBy+OrderBy can be executed
by
+    // only one reducer if this configuration does not prevents
     int minReducer;
     Set<Operator<?>> removedOps;
 
@@ -178,7 +189,7 @@ public class ReduceSinkDeDuplication imp
     }
   }
 
-  public abstract static class AbsctractReducerReducerProc implements NodeProcessor {
+  public abstract static class AbstractReducerReducerProc implements NodeProcessor {
 
     ReduceSinkDeduplicateProcCtx dedupCtx;
 
@@ -323,6 +334,8 @@ public class ReduceSinkDeDuplication imp
       return result;
     }
 
+    // for left outer joins, left alias is sorted but right alias might be not
+    // (nulls, etc.). vice versa.
     private boolean isSortedTag(JoinOperator joinOp, int tag) {
       for (JoinCondDesc cond : joinOp.getConf().getConds()) {
         switch (cond.getType()) {
@@ -356,6 +369,10 @@ public class ReduceSinkDeDuplication imp
       return -1;
     }
 
+    /**
+     * Current RSDedup remove/replace child RS. So always copies
+     * more specific part of configurations of child RS to that of parent RS.
+     */
     protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
         throws SemanticException {
       int[] result = checkStatus(cRS, pRS, minReducer);
@@ -379,7 +396,15 @@ public class ReduceSinkDeDuplication imp
       return true;
     }
 
-    // -1 for p to c, 1 for c to p
+    /**
+     * Returns merge directions between two RSs for criterias (ordering, number of reducers,
+     * reducer keys, partition keys). Returns null if any of categories is not mergeable.
+     *
+     * Values for each index can be -1, 0, 1
+     * 1. 0 means two configuration in the category is the same
+     * 2. for -1, configuration of parent RS is more specific than child RS
+     * 3. for 1, configuration of child RS is more specific than parent RS
+     */
     private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
         throws SemanticException {
       ReduceSinkDesc cConf = cRS.getConf();
@@ -408,6 +433,11 @@ public class ReduceSinkDeDuplication imp
       return new int[] {moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo};
     }
 
+    /**
+     * Overlapping part of keys should be the same between parent and child.
+     * And if child has more keys than parent, non-overlapping part of keys
+     * should be backtrackable to parent.
+     */
     private Integer checkExprs(List<ExprNodeDesc> ckeys, List<ExprNodeDesc> pkeys,
         ReduceSinkOperator cRS, ReduceSinkOperator pRS) throws SemanticException {
       Integer moveKeyColTo = 0;
@@ -419,6 +449,7 @@ public class ReduceSinkDeDuplication imp
         if (pkeys == null || pkeys.isEmpty()) {
           for (ExprNodeDesc ckey : ckeys) {
             if (ExprNodeDescUtils.backtrack(ckey, cRS, pRS) == null) {
+              // cKey is not present in parent
               return null;
             }
           }
@@ -430,6 +461,7 @@ public class ReduceSinkDeDuplication imp
       return moveKeyColTo;
     }
 
+    // backtrack key exprs of child to parent and compare it with parent's
     protected Integer sameKeys(List<ExprNodeDesc> cexprs, List<ExprNodeDesc>
pexprs,
         Operator<?> child, Operator<?> parent) throws SemanticException {
       int common = Math.min(cexprs.size(), pexprs.size());
@@ -438,13 +470,14 @@ public class ReduceSinkDeDuplication imp
       for (; i < common; i++) {
         ExprNodeDesc pexpr = pexprs.get(i);
         ExprNodeDesc cexpr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent);
-        if (!pexpr.isSame(cexpr)) {
+        if (cexpr == null || !pexpr.isSame(cexpr)) {
           return null;
         }
       }
       for (;i < limit; i++) {
         if (cexprs.size() > pexprs.size()) {
           if (ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent) == null) {
+            // cKey is not present in parent
             return null;
           }
         }
@@ -452,6 +485,7 @@ public class ReduceSinkDeDuplication imp
       return Integer.valueOf(cexprs.size()).compareTo(pexprs.size());
     }
 
+    // order of overlapping keys should be exactly the same
     protected Integer checkOrder(String corder, String porder) {
       if (corder == null || corder.trim().equals("")) {
         if (porder == null || porder.trim().equals("")) {
@@ -471,6 +505,11 @@ public class ReduceSinkDeDuplication imp
       return Integer.valueOf(corder.length()).compareTo(porder.length());
     }
 
+    /**
+     * If number of reducers for RS is -1, the RS can have any number of reducers.
+     * It's generally true except for order-by or forced bucketing cases.
+     * if both of num-reducers are not -1, those number should be the same.
+     */
     protected Integer checkNumReducer(int creduce, int preduce) {
       if (creduce < 0) {
         if (preduce < 0) {
@@ -549,6 +588,8 @@ public class ReduceSinkDeDuplication imp
       return select;
     }
 
+    // replace the cRS to SEL operator
+    // If child if cRS is EXT, EXT also should be removed
     private SelectOperator replaceOperatorWithSelect(Operator<?> operator, ParseContext
context)
         throws SemanticException {
       RowResolver inputRR = context.getOpParseCtx().get(operator).getRowResolver();
@@ -585,6 +626,8 @@ public class ReduceSinkDeDuplication imp
       Operator<?> parent = getSingleParent(cRS);
 
       if (parent instanceof GroupByOperator) {
+        // pRS-cGBYm-cRS-cGBYr (map aggregation) --> pRS-cGBYr(COMPLETE)
+        // copies desc of cGBYm to cGBYr and remove cGBYm and cRS
         GroupByOperator cGBYm = (GroupByOperator) parent;
 
         cGBYr.getConf().setKeys(cGBYm.getConf().getKeys());
@@ -597,6 +640,8 @@ public class ReduceSinkDeDuplication imp
         RowResolver resolver = context.getOpParseCtx().get(cGBYm).getRowResolver();
         context.getOpParseCtx().get(cGBYr).setRowResolver(resolver);
       } else {
+        // pRS-cRS-cGBYr (no map aggregation) --> pRS-cGBYr(COMPLETE)
+        // revert expressions of cGBYr to that of cRS
         cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(cGBYr.getConf().getKeys(), cGBYr,
cRS));
         for (AggregationDesc aggr : cGBYr.getConf().getAggregators()) {
           aggr.setParameters(ExprNodeDescUtils.backtrack(aggr.getParameters(), cGBYr, cRS));
@@ -655,7 +700,7 @@ public class ReduceSinkDeDuplication imp
     }
   }
 
-  static class GroupbyReducerProc extends AbsctractReducerReducerProc {
+  static class GroupbyReducerProc extends AbstractReducerReducerProc {
 
     // pRS-pGBY-cRS
     public Object process(ReduceSinkOperator cRS, ParseContext context)
@@ -689,7 +734,7 @@ public class ReduceSinkDeDuplication imp
     }
   }
 
-  static class JoinReducerProc extends AbsctractReducerReducerProc {
+  static class JoinReducerProc extends AbstractReducerReducerProc {
 
     // pRS-pJOIN-cRS
     public Object process(ReduceSinkOperator cRS, ParseContext context)
@@ -717,7 +762,7 @@ public class ReduceSinkDeDuplication imp
     }
   }
 
-  static class ReducerReducerProc extends AbsctractReducerReducerProc {
+  static class ReducerReducerProc extends AbstractReducerReducerProc {
 
     // pRS-cRS
     public Object process(ReduceSinkOperator cRS, ParseContext context)

Modified: hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q?rev=1489436&r1=1489435&r2=1489436&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q Tue Jun  4
13:24:02 2013
@@ -2,12 +2,21 @@ set hive.optimize.reducededuplication=tr
 set hive.optimize.reducededuplication.min.reducer=1;
 set hive.map.aggr=true;
 
+-- HIVE-2340 deduplicate RS followed by RS
+-- hive.optimize.reducededuplication : wherther using this optimization
+-- hive.optimize.reducededuplication.min.reducer : number of reducer of deduped RS should
be this at least
+
+-- RS-mGBY-RS-rGBY
 explain select key, sum(key) from (select * from src distribute by key sort by key, value)
Q1 group by key;
 explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group
by key, lower(value);
 explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by
key) Q1 group by key, (X + 1);
+-- mGBY-RS-rGBY-RS
 explain select key, sum(key) as value from src group by key order by key, value;
+-- RS-JOIN-mGBY-RS-rGBY
 explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value;
+-- RS-JOIN-RS
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key,
src.value;
+-- mGBY-RS-rGBY-mGBY-RS-rGBY
 explain from (select key, value from src group by key, value) s select s.key group by s.key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group
by key;
@@ -20,12 +29,17 @@ from (select key, value from src group b
 
 set hive.map.aggr=false;
 
+-- RS-RS-GBY
 explain select key, sum(key) from (select * from src distribute by key sort by key, value)
Q1 group by key;
 explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group
by key, lower(value);
 explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by
key) Q1 group by key, (X + 1);
+-- RS-GBY-RS
 explain select key, sum(key) as value from src group by key order by key, value;
+-- RS-JOIN-RS-GBY
 explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value;
+-- RS-JOIN-RS
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key,
src.value;
+-- RS-GBY-RS-GBY
 explain from (select key, value from src group by key, value) s select s.key group by s.key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group
by key;
@@ -34,4 +48,4 @@ select key, sum(key), (X + 1) from (sele
 select key, sum(key) as value from src group by key order by key, value;
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
-from (select key, value from src group by key, value) s select s.key group by s.key;
\ No newline at end of file
+from (select key, value from src group by key, value) s select s.key group by s.key;

Modified: hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1489436&r1=1489435&r2=1489436&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out Tue Jun
 4 13:24:02 2013
@@ -1,6 +1,16 @@
-PREHOOK: query: explain select key, sum(key) from (select * from src distribute by key sort
by key, value) Q1 group by key
+PREHOOK: query: -- HIVE-2340 deduplicate RS followed by RS
+-- hive.optimize.reducededuplication : wherther using this optimization
+-- hive.optimize.reducededuplication.min.reducer : number of reducer of deduped RS should
be this at least
+
+-- RS-mGBY-RS-rGBY
+explain select key, sum(key) from (select * from src distribute by key sort by key, value)
Q1 group by key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, sum(key) from (select * from src distribute by key sort
by key, value) Q1 group by key
+POSTHOOK: query: -- HIVE-2340 deduplicate RS followed by RS
+-- hive.optimize.reducededuplication : wherther using this optimization
+-- hive.optimize.reducededuplication.min.reducer : number of reducer of deduped RS should
be this at least
+
+-- RS-mGBY-RS-rGBY
+explain select key, sum(key) from (select * from src distribute by key sort by key, value)
Q1 group by key
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src)))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))
(TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL
key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) Q1)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION
sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
@@ -242,9 +252,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain select key, sum(key) as value from src group by key order by key,
value
+PREHOOK: query: -- mGBY-RS-rGBY-RS
+explain select key, sum(key) as value from src group by key order by key, value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, sum(key) as value from src group by key order by key,
value
+POSTHOOK: query: -- mGBY-RS-rGBY-RS
+explain select key, sum(key) as value from src group by key order by key, value
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION
sum (TOK_TABLE_OR_COL key)) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC
(TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)))))
@@ -317,9 +329,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key
group by src.key, src.value
+PREHOOK: query: -- RS-JOIN-mGBY-RS-rGBY
+explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key
group by src.key, src.value
+POSTHOOK: query: -- RS-JOIN-mGBY-RS-rGBY
+explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME
src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR
(TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src)
key) (. (TOK_TABLE_OR_COL src) value))))
@@ -447,9 +461,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key
order by src.key, src.value
+PREHOOK: query: -- RS-JOIN-RS
+explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key,
src.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key
order by src.key, src.value
+POSTHOOK: query: -- RS-JOIN-RS
+explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key,
src.value
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME
src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL src) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL
src) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) value)))))
@@ -546,9 +562,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain from (select key, value from src group by key, value) s select s.key
group by s.key
+PREHOOK: query: -- mGBY-RS-rGBY-mGBY-RS-rGBY
+explain from (select key, value from src group by key, value) s select s.key group by s.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain from (select key, value from src group by key, value) s select s.key
group by s.key
+POSTHOOK: query: -- mGBY-RS-rGBY-mGBY-RS-rGBY
+explain from (select key, value from src group by key, value) s select s.key group by s.key
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src)))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL
key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL
value)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR
(. (TOK_TABLE_OR_COL s) key))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) key))))
@@ -2289,9 +2307,11 @@ POSTHOOK: Input: default@src
 96
 97
 98
-PREHOOK: query: explain select key, sum(key) from (select * from src distribute by key sort
by key, value) Q1 group by key
+PREHOOK: query: -- RS-RS-GBY
+explain select key, sum(key) from (select * from src distribute by key sort by key, value)
Q1 group by key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, sum(key) from (select * from src distribute by key sort
by key, value) Q1 group by key
+POSTHOOK: query: -- RS-RS-GBY
+explain select key, sum(key) from (select * from src distribute by key sort by key, value)
Q1 group by key
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src)))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))
(TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL
key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) Q1)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION
sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
@@ -2533,9 +2553,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain select key, sum(key) as value from src group by key order by key,
value
+PREHOOK: query: -- RS-GBY-RS
+explain select key, sum(key) as value from src group by key order by key, value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, sum(key) as value from src group by key order by key,
value
+POSTHOOK: query: -- RS-GBY-RS
+explain select key, sum(key) as value from src group by key order by key, value
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION
sum (TOK_TABLE_OR_COL key)) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC
(TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)))))
@@ -2596,9 +2618,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key
group by src.key, src.value
+PREHOOK: query: -- RS-JOIN-RS-GBY
+explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key
group by src.key, src.value
+POSTHOOK: query: -- RS-JOIN-RS-GBY
+explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME
src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR
(TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src)
key) (. (TOK_TABLE_OR_COL src) value))))
@@ -2712,9 +2736,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key
order by src.key, src.value
+PREHOOK: query: -- RS-JOIN-RS
+explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key,
src.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key
order by src.key, src.value
+POSTHOOK: query: -- RS-JOIN-RS
+explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key,
src.value
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME
src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL src) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL
src) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) value)))))
@@ -2811,9 +2837,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: explain from (select key, value from src group by key, value) s select s.key
group by s.key
+PREHOOK: query: -- RS-GBY-RS-GBY
+explain from (select key, value from src group by key, value) s select s.key group by s.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain from (select key, value from src group by key, value) s select s.key
group by s.key
+POSTHOOK: query: -- RS-GBY-RS-GBY
+explain from (select key, value from src group by key, value) s select s.key group by s.key
 POSTHOOK: type: QUERY
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src)))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL
key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL
value)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR
(. (TOK_TABLE_OR_COL s) key))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) key))))



Mime
View raw message