hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jxi...@apache.org
Subject hive git commit: HIVE-11712: Duplicate groupby keys cause ClassCastException (Jimmy, reviewed by Xuefu)
Date Thu, 03 Sep 2015 17:32:08 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 1666aeb84 -> 3bbfbc377


HIVE-11712: Duplicate groupby keys cause ClassCastException (Jimmy, reviewed by Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3bbfbc37
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3bbfbc37
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3bbfbc37

Branch: refs/heads/branch-1
Commit: 3bbfbc37710ea03c22d511f7ff60e5d10fdff2a8
Parents: 1666aeb
Author: Jimmy Xiang <jxiang@cloudera.com>
Authored: Tue Sep 1 11:48:36 2015 -0700
Committer: Jimmy Xiang <jxiang@cloudera.com>
Committed: Thu Sep 3 10:26:10 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   4 +
 .../queries/clientpositive/groupby1_map_nomap.q |   2 +
 ql/src/test/queries/clientpositive/groupby6.q   |   2 +
 .../clientpositive/groupby_grouping_id2.q       |   2 +
 .../clientpositive/groupby_ppr_multi_distinct.q |   2 +
 ql/src/test/queries/clientpositive/having2.q    |  27 ++
 .../clientpositive/groupby1_map_nomap.q.out     |   8 +-
 .../test/results/clientpositive/groupby6.q.out  |   8 +-
 .../clientpositive/groupby_duplicate_key.q.out  |  16 +-
 .../clientpositive/groupby_grouping_id2.q.out   |  28 +-
 .../groupby_ppr_multi_distinct.q.out            |   8 +-
 .../test/results/clientpositive/having2.q.out   | 345 +++++++++++++++++++
 12 files changed, 426 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 2de9397..d51f7f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -4631,6 +4631,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
           groupByInputRowResolver);
 
+      if (ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
+        // Skip duplicated grouping keys
+        continue;
+      }
       groupByKeys.add(grpByExprNode);
       String field = getColumnInternalName(i);
       outputColumnNames.add(field);

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
index eb09a9c..b22a61e 100644
--- a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
+++ b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
@@ -2,6 +2,8 @@ set hive.map.aggr=true;
 set hive.groupby.skewindata=false;
 set hive.groupby.mapaggr.checkinterval=20;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
 
 EXPLAIN

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/queries/clientpositive/groupby6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby6.q b/ql/src/test/queries/clientpositive/groupby6.q
index 3a3cc58..17597cb 100755
--- a/ql/src/test/queries/clientpositive/groupby6.q
+++ b/ql/src/test/queries/clientpositive/groupby6.q
@@ -1,6 +1,8 @@
 set hive.map.aggr=false;
 set hive.groupby.skewindata=true;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE;
 
 EXPLAIN

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index f451f17..5c05aad 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -4,6 +4,8 @@ LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE
T1;
 
 set hive.groupby.skewindata = true;
 
+-- SORT_QUERY_RESULTS
+
 SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
 
 SELECT GROUPING__ID, count(*)

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
index 20c73bd..1249853 100644
--- a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
+++ b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
@@ -1,6 +1,8 @@
 set hive.map.aggr=false;
 set hive.groupby.skewindata=false;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
 
 EXPLAIN EXTENDED

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/queries/clientpositive/having2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/having2.q b/ql/src/test/queries/clientpositive/having2.q
index 282b2c0..83ae1e1 100644
--- a/ql/src/test/queries/clientpositive/having2.q
+++ b/ql/src/test/queries/clientpositive/having2.q
@@ -63,3 +63,30 @@ SELECT customer_name, SUM(customer_balance), SUM(order_quantity) FROM default.te
 (SUM(customer_balance) <= 4074689.000000041)
 AND (COUNT(s1.discount) <= 822)
 );
+
+explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name
= s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);
+
+explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name
= s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);
+
+explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
index cc985a5..7cdf240 100644
--- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/results/clientpositive/groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out
index b790224..d8cb2ac 100644
--- a/ql/src/test/results/clientpositive/groupby6.q.out
+++ b/ql/src/test/results/clientpositive/groupby6.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
index 2f2a6e6..fc95f41 100644
--- a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
+++ b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
@@ -21,14 +21,14 @@ STAGE PLANS:
               outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
               Group By Operator
-                keys: key (type: string), '' (type: string), '' (type: string)
+                keys: key (type: string), '' (type: string)
                 mode: hash
-                outputColumnNames: _col0, _col1, _col2
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column
stats: NONE
       Reduce Operator Tree:
         Group By Operator
@@ -99,14 +99,14 @@ STAGE PLANS:
               outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
               Group By Operator
-                keys: key (type: string), 'X' (type: string), 'X' (type: string)
+                keys: key (type: string), 'X' (type: string)
                 mode: hash
-                outputColumnNames: _col0, _col1, _col2
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column
stats: NONE
       Reduce Operator Tree:
         Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
index 4a0a9d2..544a7ae 100644
--- a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
+++ b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
@@ -14,25 +14,29 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@t1
-PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH
ROLLUP
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
 PREHOOK: type: QUERY
 PREHOOK: Input: default@t1
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH
ROLLUP
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-NULL	NULL	0	6
+1	1	3	1
 1	NULL	1	2
 1	NULL	3	1
-1	1	3	1
-2	NULL	1	1
 2	2	3	1
+2	NULL	1	1
+3	3	3	1
 3	NULL	1	2
 3	NULL	3	1
-3	3	3	1
-4	NULL	1	1
 4	5	3	1
+4	NULL	1	1
+NULL	NULL	0	6
 PREHOOK: query: SELECT GROUPING__ID, count(*)
 FROM
 (
@@ -129,17 +133,17 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP
BY key,
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-NULL	NULL	0	6
+1	1	3	1
 1	NULL	1	2
 1	NULL	3	1
-1	1	3	1
-2	NULL	1	1
 2	2	3	1
+2	NULL	1	1
+3	3	3	1
 3	NULL	1	2
 3	NULL	3	1
-3	3	3	1
-4	NULL	1	1
 4	5	3	1
+4	NULL	1	1
+NULL	NULL	0	6
 PREHOOK: query: SELECT GROUPING__ID, count(*)
 FROM
 (

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
index c50abde..6eb3f66 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED
AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED
AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbfbc37/ql/src/test/results/clientpositive/having2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/having2.q.out b/ql/src/test/results/clientpositive/having2.q.out
index aafd3b6..7dac8ea 100644
--- a/ql/src/test/results/clientpositive/having2.q.out
+++ b/ql/src/test/results/clientpositive/having2.q.out
@@ -242,3 +242,348 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name
= s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name
= s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: customer_name is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: customer_name (type: string)
+                sort order: +
+                Map-reduce partition columns: customer_name (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                value expressions: discount (type: double), customer_balance (type: double)
+          TableScan
+            alias: s2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: value (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 customer_name (type: string)
+            1 key (type: string)
+          outputColumnNames: _col6, _col18, _col21, _col54
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double),
_col54 (type: string)
+            outputColumnNames: _col18, _col21, _col6, _col54
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+            Group By Operator
+              aggregations: sum(_col21), avg(_col6), count(_col54)
+              keys: _col18 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>),
_col3 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (((_col1 <= 4074689.000000041) and (_col2 <= 822.0)) and (_col3
> 4)) (type: boolean)
+            Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name
= s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name
= s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: customer_name is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: customer_name (type: string)
+                sort order: +
+                Map-reduce partition columns: customer_name (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                value expressions: discount (type: double), customer_balance (type: double)
+          TableScan
+            alias: s2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: value (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 customer_name (type: string)
+            1 key (type: string)
+          outputColumnNames: _col6, _col18, _col21, _col54
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double),
_col54 (type: string)
+            outputColumnNames: _col18, _col21, _col6, _col54
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+            Group By Operator
+              aggregations: sum(_col21), avg(_col6), count(_col54)
+              keys: _col18 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>),
_col3 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+          keys: KEY._col0 (type: string), KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4
> 4)) (type: boolean)
+            Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: customer_name is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: customer_name (type: string)
+                sort order: +
+                Map-reduce partition columns: customer_name (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                value expressions: discount (type: double), customer_balance (type: double)
+          TableScan
+            alias: s2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: value (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 customer_name (type: string)
+            1 key (type: string)
+          outputColumnNames: _col6, _col18, _col21, _col54
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double),
_col54 (type: string)
+            outputColumnNames: _col18, _col21, _col6, _col54
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+            Group By Operator
+              aggregations: sum(_col21), avg(_col6), count(_col54)
+              keys: _col18 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>),
_col3 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+          keys: KEY._col0 (type: string), KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4
> 4)) (type: boolean)
+            Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: string), _col1 (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+


Mime
View raw message