hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kevinwilf...@apache.org
Subject svn commit: r1424292 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Thu, 20 Dec 2012 04:33:22 GMT
Author: kevinwilfong
Date: Thu Dec 20 04:33:21 2012
New Revision: 1424292

URL: http://svn.apache.org/viewvc?rev=1424292&view=rev
Log:
HIVE-3728. make optimizing multi-group by configurable. (njain via kevinwilfong)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q
    hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml.template
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1424292&r1=1424291&r2=1424292&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Dec 20 04:33:21
2012
@@ -392,6 +392,8 @@ public class HiveConf extends Configurat
     HIVEALIAS("hive.alias", ""),
     HIVEMAPSIDEAGGREGATE("hive.map.aggr", true),
     HIVEGROUPBYSKEW("hive.groupby.skewindata", false),
+    HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS("hive.optimize.multigroupby.common.distincts",
+        true),
     HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
     HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
     HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1424292&r1=1424291&r2=1424292&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Thu Dec 20 04:33:21 2012
@@ -366,6 +366,25 @@
 </property>
 
 <property>
+  <name>hive.optimize.multigroupby.common.distincts</name>
+  <value>true</value>
+  <description>Whether to optimize a multi-groupby query with the same distinct.
+    Consider a query like:
+
+      from src
+        insert overwrite table dest1 select col1, count(distinct colx) group by col1
+        insert overwrite table dest2 select col2, count(distinct colx) group by col2;
+
+    With this parameter set to true, first we spray by the distinct value (colx), and then
+    perform the 2 groups bys. This makes sense if map-side aggregation is turned off. However,
+    with maps-side aggregation, it might be useful in some cases to treat the 2 inserts independently,

+    thereby performing the query above in 2MR jobs instead of 3 (due to spraying by distinct
key first).
+    If this parameter is turned off, we dont consider the fact that the distinct key is the
same across
+    different MR jobs.
+  </description>
+</property>
+
+<property>
   <name>hive.groupby.mapaggr.checkinterval</name>
   <value>100000</value>
   <description>Number of rows after which size of the grouping keys/aggregation classes
is performed</description>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1424292&r1=1424291&r2=1424292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Dec 20
04:33:21 2012
@@ -6707,7 +6707,20 @@ public class SemanticAnalyzer extends Ba
     // currently. It doesnt matter whether he has asked to do
     // map-side aggregation or not. Map side aggregation is turned off
     List<ASTNode> commonDistinctExprs = getCommonDistinctExprs(qb, input);
-    boolean optimizeMultiGroupBy = commonDistinctExprs != null;
+
+    // Consider a query like:
+    //
+    //  from src
+    //    insert overwrite table dest1 select col1, count(distinct colx) group by col1
+    //    insert overwrite table dest2 select col2, count(distinct colx) group by col2;
+    //
+    // With HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS set to true, first we spray by the
distinct
+    // value (colx), and then perform the 2 groups bys. This makes sense if map-side aggregation
is
+    // turned off. However, with maps-side aggregation, it might be useful in some cases
to treat
+    // the 2 inserts independently, thereby performing the query above in 2MR jobs instead
of 3
+    // (due to spraying by distinct key first).
+    boolean optimizeMultiGroupBy = commonDistinctExprs != null &&
+        conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS);
 
     Operator curr = input;
 

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q?rev=1424292&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q Thu
Dec 20 04:33:21 2012
@@ -0,0 +1,32 @@
+set hive.map.aggr=true;
+
+create table dest1(key int, cnt int);
+create table dest2(key int, cnt int);
+
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+
+select * from dest1 where key < 10 order by key;
+select * from dest2 where key < 20 order by key limit 10;
+
+set hive.optimize.multigroupby.common.distincts=false;
+
+-- no need to spray by distinct key first
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+select * from dest1 where key < 10 order by key;
+select * from dest2 where key < 20 order by key limit 10;

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out?rev=1424292&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out
Thu Dec 20 04:33:21 2012
@@ -0,0 +1,532 @@
+PREHOOK: query: create table dest1(key int, cnt int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dest1(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+PREHOOK: query: create table dest2(key int, cnt int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dest2(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest2
+PREHOOK: query: explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB
(TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI
count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION
(TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL
key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL
key) (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-3
+  Stage-4 depends on stages: Stage-0
+  Stage-5 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-5
+  Stage-6 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Reduce Output Operator
+              key expressions:
+                    expr: value
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: value
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: key
+                    type: string
+                    expr: (key + key)
+                    type: double
+      Reduce Operator Tree:
+        Forward
+          Group By Operator
+            aggregations:
+                  expr: count(DISTINCT KEY._col0)
+            bucketGroup: false
+            keys:
+                  expr: VALUE._col0
+                  type: string
+            mode: hash
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+          Group By Operator
+            aggregations:
+                  expr: count(DISTINCT KEY._col0)
+            bucketGroup: false
+            keys:
+                  expr: VALUE._col1
+                  type: double
+            mode: hash
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: _col1
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: final
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: UDFToInteger(_col1)
+                    type: int
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-4
+    Stats-Aggr Operator
+
+  Stage: Stage-5
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: double
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: double
+              tag: -1
+              value expressions:
+                    expr: _col1
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: double
+          mode: final
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: double
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: UDFToInteger(_col1)
+                    type: int
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 2
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest2
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest2
+
+  Stage: Stage-6
+    Stats-Aggr Operator
+
+
+PREHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+PREHOOK: query: select * from dest1 where key < 10 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1 where key < 10 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+PREHOOK: query: select * from dest2 where key < 20 order by key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2 where key < 20 order by key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+0	1
+4	1
+8	1
+10	1
+16	1
+18	1
+PREHOOK: query: -- no need to spray by distinct key first
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no need to spray by distinct key first
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB
(TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI
count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION
(TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL
key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL
key) (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Group By Operator
+                aggregations:
+                      expr: count(DISTINCT value)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col2
+                        type: bigint
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Group By Operator
+                aggregations:
+                      expr: count(DISTINCT value)
+                bucketGroup: false
+                keys:
+                      expr: (key + key)
+                      type: double
+                      expr: value
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: UDFToInteger(_col1)
+                    type: int
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: double
+                    expr: _col1
+                    type: string
+              sort order: ++
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: double
+              tag: -1
+              value expressions:
+                    expr: _col2
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: double
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: double
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+                    expr: UDFToInteger(_col1)
+                    type: int
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 2
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest2
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest2
+
+  Stage: Stage-5
+    Stats-Aggr Operator
+
+
+PREHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+PREHOOK: query: select * from dest1 where key < 10 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1 where key < 10 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+PREHOOK: query: select * from dest2 where key < 20 order by key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2 where key < 20 order by key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+0	1
+4	1
+8	1
+10	1
+16	1
+18	1



Mime
View raw message