hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-15474: Extend limit propagation for chain of RS-GB-RS operators (Jesus Camacho Rodriguez, reviewed by Rui Li)
Date Fri, 06 Jan 2017 09:29:14 GMT
Repository: hive
Updated Branches:
  refs/heads/master 4e3a62071 -> aab5dd185


HIVE-15474: Extend limit propagation for chain of RS-GB-RS operators (Jesus Camacho Rodriguez, reviewed by Rui Li)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aab5dd18
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aab5dd18
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aab5dd18

Branch: refs/heads/master
Commit: aab5dd185dbf68daff57b5f774a6157f61c0874e
Parents: 4e3a620
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Tue Dec 20 20:37:14 2016 +0000
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Fri Jan 6 09:29:10 2017 +0000

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |    1 +
 .../ql/optimizer/LimitPushdownOptimizer.java    |   67 +-
 .../hadoop/hive/ql/plan/ExprNodeDescUtils.java  |   69 ++
 .../queries/clientpositive/limit_pushdown2.q    |   18 +
 .../clientpositive/limit_pushdown2.q.out        |  232 ++++
 .../clientpositive/spark/limit_pushdown2.q.out  | 1141 ++++++++++++++++++
 6 files changed, 1494 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 1cebc70..70e7197 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1156,6 +1156,7 @@ spark.query.files=add_part_multiple.q, \
   leftsemijoin_mr.q, \
   limit_partition_metadataonly.q, \
   limit_pushdown.q, \
+  limit_pushdown2.q, \
   list_bucket_dml_2.q, \
   load_dyn_part1.q, \
   load_dyn_part10.q, \

http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
index f68d0ad..9bf197b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
@@ -179,15 +179,41 @@ public class LimitPushdownOptimizer extends Transform {
           // Not safe to continue for RS-GBY-GBY-LIM kind of pipelines. See HIVE-10607 for more.
           return false;
         }
-        if (!checkKeys(cRS.getConf().getKeyCols(), pRS.getConf().getKeyCols(), cRS, pRS)) {
-          // Keys are not the same; bail out
-          return false;
+        List<ExprNodeDesc> cKeys = cRS.getConf().getKeyCols();
+        List<ExprNodeDesc> pKeys = pRS.getConf().getKeyCols();
+        if (pRS.getChildren().get(0) instanceof GroupByOperator &&
+                pRS.getChildren().get(0).getChildren().get(0) == cRS) {
+          // RS-GB-RS
+          GroupByOperator gBy = (GroupByOperator) pRS.getChildren().get(0);
+          List<ExprNodeDesc> gKeys = gBy.getConf().getKeys();
+          if (!ExprNodeDescUtils.checkPrefixKeysUpstream(cKeys, pKeys, cRS, pRS)) {
+            // We might still be able to push the limit
+            if (!ExprNodeDescUtils.checkPrefixKeys(cKeys, gKeys, cRS, gBy) ||
+                    !ExprNodeDescUtils.checkPrefixKeys(gKeys, pKeys, gBy, pRS)) {
+              // We cannot push limit; bail out
+              return false;
+            }
+          }
+        } else {
+          if (!ExprNodeDescUtils.checkPrefixKeysUpstream(cKeys, pKeys, cRS, pRS)) {
+            // We cannot push limit; bail out
+            return false;
+          }
         }
         // Copy order
-        StringBuilder order = new StringBuilder(cRS.getConf().getOrder());
-        StringBuilder orderNull = new StringBuilder(cRS.getConf().getNullOrder());
-        order.append(pRS.getConf().getOrder().substring(order.length()));
-        orderNull.append(pRS.getConf().getNullOrder().substring(orderNull.length()));
+        StringBuilder order;
+        StringBuilder orderNull;
+        if (pRS.getConf().getOrder().length() > cRS.getConf().getOrder().length()) {
+          order = new StringBuilder(cRS.getConf().getOrder());
+          orderNull = new StringBuilder(cRS.getConf().getNullOrder());
+          order.append(pRS.getConf().getOrder().substring(order.length()));
+          orderNull.append(pRS.getConf().getNullOrder().substring(orderNull.length()));
+        } else {
+          order = new StringBuilder(cRS.getConf().getOrder().substring(
+                  0, pRS.getConf().getOrder().length()));
+          orderNull = new StringBuilder(cRS.getConf().getNullOrder().substring(
+                  0, pRS.getConf().getNullOrder().length()));
+        }
         pRS.getConf().setOrder(order.toString());
         pRS.getConf().setNullOrder(orderNull.toString());
         // Copy limit
@@ -201,33 +227,6 @@ public class LimitPushdownOptimizer extends Transform {
     }
   }
 
-  private static boolean checkKeys(List<ExprNodeDesc> cKeys, List<ExprNodeDesc> pKeys,
-      ReduceSinkOperator cRS, ReduceSinkOperator pRS) throws SemanticException {
-    if (cKeys == null || cKeys.isEmpty()) {
-      if (pKeys != null && !pKeys.isEmpty()) {
-        return false;
-      }
-      return true;
-    }
-    if (pKeys == null || pKeys.isEmpty()) {
-      return false;
-    }
-    if (cKeys.size() > pKeys.size()) {
-      return false;
-    }
-    for (int i = 0; i < cKeys.size(); i++) {
-      ExprNodeDesc expr = ExprNodeDescUtils.backtrack(cKeys.get(i), cRS, pRS);
-      if (expr == null) {
-        // cKey is not present in parent
-        return false;
-      }
-      if (!expr.isSame(pKeys.get(i))) {
-        return false;
-      }
-    }
-    return true;
-  }
-
   private static class LimitPushdownContext implements NodeProcessorCtx {
 
     private final float threshold;

http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 4c699a2..6c10704 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -713,4 +713,73 @@ public class ExprNodeDescUtils {
     // Otherwise, we return the expression
     return foldedExpr;
   }
+
+  /**
+   * Checks whether the keys of a parent operator are a prefix of the keys of a
+   * child operator.
+   * @param childKeys keys of the child operator
+   * @param parentKeys keys of the parent operator
+   * @param childOp child operator
+   * @param parentOp parent operator
+   * @return true if the keys are a prefix, false otherwise
+   * @throws SemanticException
+   */
+  public static boolean checkPrefixKeys(List<ExprNodeDesc> childKeys, List<ExprNodeDesc> parentKeys,
+      Operator<? extends OperatorDesc> childOp, Operator<? extends OperatorDesc> parentOp)
+          throws SemanticException {
+    return checkPrefixKeys(childKeys, parentKeys, childOp, parentOp, false);
+  }
+
+  /**
+   * Checks whether the keys of a child operator are a prefix of the keys of a
+   * parent operator.
+   * @param childKeys keys of the child operator
+   * @param parentKeys keys of the parent operator
+   * @param childOp child operator
+   * @param parentOp parent operator
+   * @return true if the keys are a prefix, false otherwise
+   * @throws SemanticException
+   */
+  public static boolean checkPrefixKeysUpstream(List<ExprNodeDesc> childKeys, List<ExprNodeDesc> parentKeys,
+      Operator<? extends OperatorDesc> childOp, Operator<? extends OperatorDesc> parentOp)
+          throws SemanticException {
+    return checkPrefixKeys(childKeys, parentKeys, childOp, parentOp, true);
+  }
+
+  private static boolean checkPrefixKeys(List<ExprNodeDesc> childKeys, List<ExprNodeDesc> parentKeys,
+      Operator<? extends OperatorDesc> childOp, Operator<? extends OperatorDesc> parentOp,
+      boolean upstream) throws SemanticException {
+    if (childKeys == null || childKeys.isEmpty()) {
+      if (parentKeys != null && !parentKeys.isEmpty()) {
+        return false;
+      }
+      return true;
+    }
+    if (parentKeys == null || parentKeys.isEmpty()) {
+      return false;
+    }
+    int size;
+    if (upstream) {
+      if (childKeys.size() > parentKeys.size()) {
+        return false;
+      }
+      size = childKeys.size();
+    } else {
+      if (parentKeys.size() > childKeys.size()) {
+        return false;
+      }
+      size = parentKeys.size();
+    }
+    for (int i = 0; i < size; i++) {
+      ExprNodeDesc expr = ExprNodeDescUtils.backtrack(childKeys.get(i), childOp, parentOp);
+      if (expr == null) {
+        // cKey is not present in parent
+        return false;
+      }
+      if (!expr.isSame(parentKeys.get(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/test/queries/clientpositive/limit_pushdown2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/limit_pushdown2.q b/ql/src/test/queries/clientpositive/limit_pushdown2.q
index e222763..1f00182 100644
--- a/ql/src/test/queries/clientpositive/limit_pushdown2.q
+++ b/ql/src/test/queries/clientpositive/limit_pushdown2.q
@@ -57,6 +57,24 @@ select key, value, avg(key + 1) from src
 group by value, key
 order by key desc limit 20;
 
+explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20;
+
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20;
+
+explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20;
+
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20;
+
 -- NOT APPLICABLE
 explain
 select value, avg(key + 1) myavg from src

http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/test/results/clientpositive/limit_pushdown2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/limit_pushdown2.q.out b/ql/src/test/results/clientpositive/limit_pushdown2.q.out
index b44b529..fac6164 100644
--- a/ql/src/test/results/clientpositive/limit_pushdown2.q.out
+++ b/ql/src/test/results/clientpositive/limit_pushdown2.q.out
@@ -562,6 +562,238 @@ POSTHOOK: Input: default@src
 76	val_76	77.0
 74	val_74	75.0
 72	val_72	73.0
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(_col2)
+                keys: _col0 (type: string), _col1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+                  value expressions: _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+              sort order: +++
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	3
+10	val_10	1
+100	val_100	2
+103	val_103	2
+104	val_104	2
+105	val_105	1
+11	val_11	1
+111	val_111	1
+113	val_113	2
+114	val_114	1
+116	val_116	1
+118	val_118	2
+119	val_119	3
+12	val_12	2
+120	val_120	2
+125	val_125	2
+126	val_126	1
+128	val_128	3
+129	val_129	2
+131	val_131	1
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(_col2)
+                keys: _col0 (type: string), _col1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: -+
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+                  value expressions: _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+              sort order: -++
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98	val_98	2
+97	val_97	2
+96	val_96	1
+95	val_95	2
+92	val_92	1
+90	val_90	3
+9	val_9	1
+87	val_87	1
+86	val_86	1
+85	val_85	1
+84	val_84	2
+83	val_83	2
+82	val_82	1
+80	val_80	1
+8	val_8	1
+78	val_78	1
+77	val_77	1
+76	val_76	2
+74	val_74	1
+72	val_72	2
 PREHOOK: query: -- NOT APPLICABLE
 explain
 select value, avg(key + 1) myavg from src

http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out
new file mode 100644
index 0000000..8b7c96d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out
@@ -0,0 +1,1141 @@
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	1.0
+10	val_10	11.0
+100	val_100	101.0
+103	val_103	104.0
+104	val_104	105.0
+105	val_105	106.0
+11	val_11	12.0
+111	val_111	112.0
+113	val_113	114.0
+114	val_114	115.0
+116	val_116	117.0
+118	val_118	119.0
+119	val_119	120.0
+12	val_12	13.0
+120	val_120	121.0
+125	val_125	126.0
+126	val_126	127.0
+128	val_128	129.0
+129	val_129	130.0
+131	val_131	132.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: +-
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	1.0
+10	val_10	11.0
+100	val_100	101.0
+103	val_103	104.0
+104	val_104	105.0
+105	val_105	106.0
+11	val_11	12.0
+111	val_111	112.0
+113	val_113	114.0
+114	val_114	115.0
+116	val_116	117.0
+118	val_118	119.0
+119	val_119	120.0
+12	val_12	13.0
+120	val_120	121.0
+125	val_125	126.0
+126	val_126	127.0
+128	val_128	129.0
+129	val_129	130.0
+131	val_131	132.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: -+
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98	val_98	99.0
+97	val_97	98.0
+96	val_96	97.0
+95	val_95	96.0
+92	val_92	93.0
+90	val_90	91.0
+9	val_9	10.0
+87	val_87	88.0
+86	val_86	87.0
+85	val_85	86.0
+84	val_84	85.0
+83	val_83	84.0
+82	val_82	83.0
+80	val_80	81.0
+8	val_8	9.0
+78	val_78	79.0
+77	val_77	78.0
+76	val_76	77.0
+74	val_74	75.0
+72	val_72	73.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col1 (type: string), _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	1.0
+10	val_10	11.0
+100	val_100	101.0
+103	val_103	104.0
+104	val_104	105.0
+105	val_105	106.0
+11	val_11	12.0
+111	val_111	112.0
+113	val_113	114.0
+114	val_114	115.0
+116	val_116	117.0
+118	val_118	119.0
+119	val_119	120.0
+12	val_12	13.0
+120	val_120	121.0
+125	val_125	126.0
+126	val_126	127.0
+128	val_128	129.0
+129	val_129	130.0
+131	val_131	132.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col1 (type: string), _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: -+
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98	val_98	99.0
+97	val_97	98.0
+96	val_96	97.0
+95	val_95	96.0
+92	val_92	93.0
+90	val_90	91.0
+9	val_9	10.0
+87	val_87	88.0
+86	val_86	87.0
+85	val_85	86.0
+84	val_84	85.0
+83	val_83	84.0
+82	val_82	83.0
+80	val_80	81.0
+8	val_8	9.0
+78	val_78	79.0
+77	val_77	78.0
+76	val_76	77.0
+74	val_74	75.0
+72	val_72	73.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col1 (type: string), _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: -+
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98	val_98	99.0
+97	val_97	98.0
+96	val_96	97.0
+95	val_95	96.0
+92	val_92	93.0
+90	val_90	91.0
+9	val_9	10.0
+87	val_87	88.0
+86	val_86	87.0
+85	val_85	86.0
+84	val_84	85.0
+83	val_83	84.0
+82	val_82	83.0
+80	val_80	81.0
+8	val_8	9.0
+78	val_78	79.0
+77	val_77	78.0
+76	val_76	77.0
+74	val_74	75.0
+72	val_72	73.0
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+                  sort order: +++
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	3
+10	val_10	1
+100	val_100	2
+103	val_103	2
+104	val_104	2
+105	val_105	1
+11	val_11	1
+111	val_111	1
+113	val_113	2
+114	val_114	1
+116	val_116	1
+118	val_118	2
+119	val_119	3
+12	val_12	2
+120	val_120	2
+125	val_125	2
+126	val_126	1
+128	val_128	3
+129	val_129	2
+131	val_131	1
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: -+
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+                  sort order: -++
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src 
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98	val_98	2
+97	val_97	2
+96	val_96	1
+95	val_95	2
+92	val_92	1
+90	val_90	3
+9	val_9	1
+87	val_87	1
+86	val_86	1
+85	val_85	1
+84	val_84	2
+83	val_83	2
+82	val_82	1
+80	val_80	1
+8	val_8	1
+78	val_78	1
+77	val_77	1
+76	val_76	2
+74	val_74	1
+72	val_72	2
+PREHOOK: query: -- NOT APPLICABLE
+explain
+select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT APPLICABLE
+explain
+select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col1)
+                      keys: _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: double), _col0 (type: string)
+                  sort order: +-
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: double)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_0	1.0
+val_2	3.0
+val_4	5.0
+val_5	6.0
+val_8	9.0
+val_9	10.0
+val_10	11.0
+val_11	12.0
+val_12	13.0
+val_15	16.0
+val_17	18.0
+val_18	19.0
+val_19	20.0
+val_20	21.0
+val_24	25.0
+val_26	27.0
+val_27	28.0
+val_28	29.0
+val_30	31.0
+val_33	34.0
+PREHOOK: query: -- NOT APPLICABLE
+explain
+select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT APPLICABLE
+explain
+select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                    TopN Hash Memory Usage: 0.3
+                    value expressions: _col2 (type: double)
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by rollup(value, key)
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by rollup(value, key)
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(_col2)
+                      keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                    TopN Hash Memory Usage: 0.3
+                    value expressions: _col2 (type: double)
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 20
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL	NULL	261.182
+NULL	val_0	1.0
+NULL	val_10	11.0
+NULL	val_100	101.0
+NULL	val_103	104.0
+NULL	val_104	105.0
+NULL	val_105	106.0
+NULL	val_11	12.0
+NULL	val_111	112.0
+NULL	val_113	114.0
+NULL	val_114	115.0
+NULL	val_116	117.0
+NULL	val_118	119.0
+NULL	val_119	120.0
+NULL	val_12	13.0
+NULL	val_120	121.0
+NULL	val_125	126.0
+NULL	val_126	127.0
+NULL	val_128	129.0
+NULL	val_129	130.0


Mime
View raw message