hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1426165 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpositive/ ql/src...
Date Thu, 27 Dec 2012 11:23:21 GMT
Author: namit
Date: Thu Dec 27 11:23:20 2012
New Revision: 1426165

URL: http://svn.apache.org/viewvc?rev=1426165&view=rev
Log:
HIVE-581 improve group by syntax
(Zhenxiao Luo via namit)


Added:
    hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q
    hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q
    hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
    hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q
    hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out
    hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out
    hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml.template
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Dec 27 11:23:20 2012
@@ -406,6 +406,7 @@ public class HiveConf extends Configurat
     HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5),
     HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true),
     HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false),
+    HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false),
 
     // for hive udtf operator
     HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Thu Dec 27 11:23:20 2012
@@ -1626,5 +1626,11 @@
   If the property is not set, then logging will be initialized using hive-exec-log4j.properties found on the classpath.
   If the property is set, the value must be a valid URI (java.net.URI, e.g. "file:///tmp/my-logging.properties"), which you can then extract a URL from and pass to PropertyConfigurator.configure(URL).</description>
 </property>
+
+<property>
+  <name>hive.groupby.orderby.position.alias</name>
+  <value>false</value>
+  <description>Whether to enable using Column Position Alias in Group By or Order By</description>
+</property>
 </configuration>
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Thu Dec 27 11:23:20 2012
@@ -302,6 +302,12 @@ public enum ErrorMsg {
     "ALTER VIEW AS SELECT if the view has paritions\n"),
   EXISTING_TABLE_IS_NOT_VIEW(10218,
     "Existing table is not a view\n"),
+  NO_SUPPORTED_ORDERBY_ALLCOLREF_POS(10219,
+    "Position in ORDER BY is not supported when using SELECT *"),
+  INVALID_POSITION_ALIAS_IN_GROUPBY(10220,
+    "Invalid position alias in Group By\n"),
+  INVALID_POSITION_ALIAS_IN_ORDERBY(10221,
+    "Invalid position alias in Order By\n"),
 
   SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
   SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Dec 27 11:23:20 2012
@@ -33,6 +33,7 @@ import java.util.TreeSet;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
+import org.antlr.runtime.tree.BaseTree;
 import org.antlr.runtime.tree.Tree;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.fs.ContentSummary;
@@ -8200,6 +8201,9 @@ public class SemanticAnalyzer extends Ba
 
     LOG.info("Starting Semantic Analysis");
 
+    // analyze and process the position alias
+    processPositionAlias(ast);
+
     // analyze create table command
     if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) {
       // if it is not CTAS, we don't need to go further and just return
@@ -9134,6 +9138,98 @@ public class SemanticAnalyzer extends Ba
     return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS);
   }
 
+  // Process the position alias in GROUPBY and ORDERBY
+  private void processPositionAlias(ASTNode ast) throws SemanticException {
+    if (HiveConf.getBoolVar(conf,
+          HiveConf.ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS) == false) {
+      return;
+    }
+
+    if (ast.getChildCount()  == 0) {
+      return;
+    }
+
+    boolean isAllCol;
+    ASTNode selectNode = null;
+    ASTNode groupbyNode = null;
+    ASTNode orderbyNode = null;
+
+    // get node type
+    int child_count = ast.getChildCount();
+    for (int child_pos = 0; child_pos < child_count; ++child_pos) {
+      ASTNode node = (ASTNode) ast.getChild(child_pos);
+      int type = node.getToken().getType();
+      if (type == HiveParser.TOK_SELECT) {
+        selectNode = node;
+      } else if (type == HiveParser.TOK_GROUPBY) {
+        groupbyNode = node;
+      } else if (type == HiveParser.TOK_ORDERBY) {
+        orderbyNode = node;
+      }
+    }
+
+    if (selectNode != null) {
+      int selectExpCnt = selectNode.getChildCount();
+
+      // replace each of the position alias in GROUPBY with the actual column name
+      if (groupbyNode != null) {
+        for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) {
+          ASTNode node = (ASTNode) groupbyNode.getChild(child_pos);
+          if (node.getToken().getType() == HiveParser.Number) {
+            int pos = Integer.parseInt(node.getText());
+            if (pos > 0 && pos <= selectExpCnt) {
+              groupbyNode.setChild(child_pos,
+                (BaseTree) selectNode.getChild(pos - 1).getChild(0));
+            } else {
+              throw new SemanticException(
+                ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg(
+                "Position alias: " + pos + " does not exist\n" +
+                "The Select List is indexed from 1 to " + selectExpCnt));
+            }
+          }
+        }
+      }
+
+      // replace each of the position alias in ORDERBY with the actual column name
+      if (orderbyNode != null) {
+        isAllCol = false;
+        for (int child_pos = 0; child_pos < selectNode.getChildCount(); ++child_pos) {
+          ASTNode node = (ASTNode) selectNode.getChild(child_pos).getChild(0);
+          if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
+            isAllCol = true;
+          }
+        }
+        for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) {
+          ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos);
+          ASTNode node = (ASTNode) colNode.getChild(0);
+          if (node.getToken().getType() == HiveParser.Number) {
+            if (!isAllCol) {
+              int pos = Integer.parseInt(node.getText());
+              if (pos > 0 && pos <= selectExpCnt) {
+                colNode.setChild(0, (BaseTree) selectNode.getChild(pos - 1).getChild(0));
+              } else {
+                throw new SemanticException(
+                  ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg(
+                  "Position alias: " + pos + " does not exist\n" +
+                  "The Select List is indexed from 1 to " + selectExpCnt));
+              }
+            } else {
+              throw new SemanticException(
+                ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg());
+            }
+          }
+        }
+      }
+    }
+
+    // Recursively process through the children ASTNodes
+    for (int child_pos = 0; child_pos < child_count; ++child_pos) {
+      processPositionAlias((ASTNode) ast.getChild(child_pos));
+    }
+    return;
+  }
+
+
   public QB getQB() {
     return qb;
   }

Added: hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,4 @@
+set hive.groupby.orderby.position.alias=true;
+
+-- invalid position alias in group by
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 3;

Added: hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,4 @@
+set hive.groupby.orderby.position.alias=true;
+
+-- invalid position alias in order by
+SELECT src.key, src.value FROM src ORDER BY 0;

Added: hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,4 @@
+set hive.groupby.orderby.position.alias=true;
+
+-- position alias is not supported when SELECT *
+SELECT src.* FROM src ORDER BY 1;

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,72 @@
+set hive.groupby.orderby.position.alias=true;
+
+CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE;
+CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE;
+
+-- Position Alias in GROUP BY and ORDER BY
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2;
+
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2;
+
+SELECT key, value FROM testTable1 ORDER BY 1, 2;
+SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1;
+
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1;
+
+SELECT key, value FROM testTable1 ORDER BY 1, 2;
+SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3;
+
+-- Position Alias in subquery
+
+EXPLAIN
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC;
+
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC;
+
+EXPLAIN
+SELECT c1, c2, c3, c4
+FROM (
+ FROM 
+  (
+  FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+  ) a
+ JOIN 
+ (
+  FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b 
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC;
+
+SELECT c1, c2, c3, c4
+FROM (
+ FROM 
+  (
+  FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+  ) a
+ JOIN 
+ (
+  FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b 
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC;

Added: hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1,3 @@
+FAILED: SemanticException [Error 10220]: Invalid position alias in Group By
+ Position alias: 3 does not exist
+The Select List is indexed from 1 to 2

Added: hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1,3 @@
+FAILED: SemanticException [Error 10221]: Invalid position alias in Order By
+ Position alias: 0 does not exist
+The Select List is indexed from 1 to 2

Added: hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10219]: Position in ORDER BY is not supported when using SELECT *

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1,1120 @@
+PREHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@testTable1
+PREHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@testTable2
+PREHOOK: query: -- Position Alias in GROUP BY and ORDER BY
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Position Alias in GROUP BY and ORDER BY
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))))
+
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key < 20.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: key, value
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT substr(value, 5))
+                  bucketGroup: false
+                  keys:
+                        expr: key
+                        type: string
+                        expr: substr(value, 5)
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    sort order: ++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col2
+                          type: bigint
+            Filter Operator
+              predicate:
+                  expr: (key < 20.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: key, value
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT substr(value, 5))
+                  bucketGroup: false
+                  keys:
+                        expr: key
+                        type: string
+                        expr: value
+                        type: string
+                        expr: substr(value, 5)
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: _col1
+                    type: bigint
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.testtable1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.testtable1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+                    expr: _col2
+                    type: string
+              sort order: +++
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: _col3
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col2:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col2
+                  type: bigint
+            outputColumnNames: _col0, _col1, _col2
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: _col1
+                    type: string
+                    expr: _col2
+                    type: bigint
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                GlobalTableId: 2
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.testtable2
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.testtable2
+
+  Stage: Stage-5
+    Stats-Aggr Operator
+
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testtable1
+PREHOOK: Output: default@testtable2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testtable1
+POSTHOOK: Output: default@testtable2
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	val_0	1
+2	val_2	1
+4	val_4	1
+5	val_5	1
+8	val_8	1
+9	val_9	1
+10	val_10	1
+11	val_11	1
+12	val_12	1
+15	val_15	1
+17	val_17	1
+18	val_18	1
+19	val_19	1
+PREHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key))))
+
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key < 20.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: key, value
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT substr(value, 5))
+                  bucketGroup: false
+                  keys:
+                        expr: key
+                        type: string
+                        expr: substr(value, 5)
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    sort order: ++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col2
+                          type: bigint
+            Filter Operator
+              predicate:
+                  expr: (key < 20.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: value
+                      type: string
+                      expr: key
+                      type: string
+                outputColumnNames: value, key
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT substr(value, 5))
+                  bucketGroup: false
+                  keys:
+                        expr: value
+                        type: string
+                        expr: key
+                        type: string
+                        expr: substr(value, 5)
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: _col1
+                    type: bigint
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.testtable1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.testtable1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+                    expr: _col2
+                    type: string
+              sort order: +++
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: _col3
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col2:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions:
+                  expr: _col1
+                  type: string
+                  expr: _col0
+                  type: string
+                  expr: _col2
+                  type: bigint
+            outputColumnNames: _col0, _col1, _col2
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: _col1
+                    type: string
+                    expr: _col2
+                    type: bigint
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                GlobalTableId: 2
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.testtable2
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.testtable2
+
+  Stage: Stage-5
+    Stats-Aggr Operator
+
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testtable1
+PREHOOK: Output: default@testtable2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testtable1
+POSTHOOK: Output: default@testtable2
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	val_0	1
+2	val_2	1
+4	val_4	1
+5	val_5	1
+8	val_8	1
+9	val_9	1
+10	val_10	1
+11	val_11	1
+12	val_12	1
+15	val_15	1
+17	val_17	1
+18	val_18	1
+19	val_19	1
+PREHOOK: query: -- Position Alias in subquery
+
+EXPLAIN
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Position Alias in subquery
+
+EXPLAIN
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)) (TOK_WHERE (<= (. (TOK_TABLE_OR_COL b) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (. (TOK_TABLE_OR_COL t) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL t) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t:b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (key <= 20.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                outputColumnNames: key
+                Group By Operator
+                  aggregations:
+                        expr: count(1)
+                  bucketGroup: false
+                  keys:
+                        expr: key
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                    sort order: +
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col1
+                          type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col1
+                    type: bigint
+                    expr: _col0
+                    type: string
+              sort order: -+
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	3
+5	3
+12	2
+15	2
+18	2
+10	1
+11	1
+17	1
+19	1
+2	1
+20	1
+4	1
+8	1
+9	1
+PREHOOK: query: EXPLAIN
+SELECT c1, c2, c3, c4
+FROM (
+ FROM 
+  (
+  FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+  ) a
+ JOIN 
+ (
+  FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b 
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT c1, c2, c3, c4
+FROM (
+ FROM 
+  (
+  FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+  ) a
+ JOIN 
+ (
+  FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b 
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value) c2) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL src1) value) 5)) c3)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src1) key) 10) (< (. (TOK_TABLE_OR_COL src1) key) 20))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src1) value)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value) c4)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src2) key) 15) (< (. (TOK_TABLE_OR_COL src2) key) 25))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src2
 ) value)))) b) (= (. (TOK_TABLE_OR_COL a) c1) (. (TOK_TABLE_OR_COL b) c3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c1) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c2) c2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c3) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c4) c4)))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1)) (TOK_SELEXPR (TOK_TABLE_OR_COL c2)) (TOK_SELEXPR (TOK_TABLE_OR_COL c3)) (TOK_SELEXPR (TOK_TABLE_OR_COL c4))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c1)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c3)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c4)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-4 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        c:a:src1 
+          TableScan
+            alias: src1
+            Filter Operator
+              predicate:
+                  expr: ((key > 10.0) and (key < 20.0))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: key, value
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT substr(value, 5))
+                  bucketGroup: false
+                  keys:
+                        expr: key
+                        type: string
+                        expr: value
+                        type: string
+                        expr: substr(value, 5)
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                          expr: _col2
+                          type: string
+                    sort order: +++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col3
+                          type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col2:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        $INTNAME 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+        $INTNAME1 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col3, _col4
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col3
+                  type: string
+                  expr: _col4
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+                    expr: _col2
+                    type: string
+                    expr: _col3
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+                    expr: _col2
+                    type: string
+                    expr: _col3
+                    type: string
+              sort order: --++
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+                    expr: _col2
+                    type: string
+                    expr: _col3
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        c:b:src2 
+          TableScan
+            alias: src2
+            Filter Operator
+              predicate:
+                  expr: ((key > 15.0) and (key < 25.0))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: key, value
+                Group By Operator
+                  bucketGroup: false
+                  keys:
+                        expr: key
+                        type: string
+                        expr: value
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    sort order: ++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    tag: -1
+      Reduce Operator Tree:
+        Group By Operator
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT c1, c2, c3, c4
+FROM (
+ FROM 
+  (
+  FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+  ) a
+ JOIN 
+ (
+  FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b 
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT c1, c2, c3, c4
+FROM (
+ FROM 
+  (
+  FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+  ) a
+ JOIN 
+ (
+  FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b 
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+19	val_19	19	val_19
+18	val_18	18	val_18
+17	val_17	17	val_17



Mime
View raw message