hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pxi...@apache.org
Subject hive git commit: HIVE-15685: count(distinct) generates different result than expected (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Date Sat, 21 Jan 2017 18:17:28 GMT
Repository: hive
Updated Branches:
  refs/heads/master d9343f6d6 -> f8b6ebb20


HIVE-15685: count(distinct) generates different result than expected (Pengcheng Xiong, reviewed
by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b6ebb2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b6ebb2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b6ebb2

Branch: refs/heads/master
Commit: f8b6ebb20517079c988a4fa043b6f3d33f1b973b
Parents: d9343f6
Author: Pengcheng Xiong <pxiong@apache.org>
Authored: Sat Jan 21 10:01:30 2017 -0800
Committer: Pengcheng Xiong <pxiong@apache.org>
Committed: Sat Jan 21 10:01:30 2017 -0800

----------------------------------------------------------------------
 .../test/results/negative/cascade_dbdrop.q.out  |  24 +-
 .../generatehfiles_require_family_path.q.out    |   8 +-
 .../test/resources/testconfiguration.properties |   3 +-
 .../rules/HiveExpandDistinctAggregatesRule.java |  29 +-
 .../clientpositive/multi_count_distinct_null.q  |  45 +++
 .../llap/multi_count_distinct_null.q.out        | 312 +++++++++++++++++++
 6 files changed, 384 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f8b6ebb2/hbase-handler/src/test/results/negative/cascade_dbdrop.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/negative/cascade_dbdrop.q.out b/hbase-handler/src/test/results/negative/cascade_dbdrop.q.out
index f80f4a2..cef7a06 100644
--- a/hbase-handler/src/test/results/negative/cascade_dbdrop.q.out
+++ b/hbase-handler/src/test/results/negative/cascade_dbdrop.q.out
@@ -4,34 +4,14 @@ PREHOOK: Output: database:hbaseDB
 POSTHOOK: query: CREATE DATABASE hbaseDB
 POSTHOOK: type: CREATEDATABASE
 POSTHOOK: Output: database:hbaseDB
-PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
--- Hadoop 0.23 changes the behavior FsShell on Exit Codes
--- In Hadoop 0.20
--- Exit Code == 0 on success
--- Exit code < 0 on any failure
--- In Hadoop 0.23
--- Exit Code == 0 on success
--- Exit Code < 0 on syntax/usage error
--- Exit Code > 0 operation failed
-
-CREATE TABLE hbaseDB.hbase_table_0(key int, value string)
+PREHOOK: query: CREATE TABLE hbaseDB.hbase_table_0(key int, value string)
 STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
 WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string")
 TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:hbasedb
 PREHOOK: Output: hbaseDB@hbase_table_0
-POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
--- Hadoop 0.23 changes the behavior FsShell on Exit Codes
--- In Hadoop 0.20
--- Exit Code == 0 on success
--- Exit code < 0 on any failure
--- In Hadoop 0.23
--- Exit Code == 0 on success
--- Exit Code < 0 on syntax/usage error
--- Exit Code > 0 operation failed
-
-CREATE TABLE hbaseDB.hbase_table_0(key int, value string)
+POSTHOOK: query: CREATE TABLE hbaseDB.hbase_table_0(key int, value string)
 STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
 WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string")
 TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b6ebb2/hbase-handler/src/test/results/negative/generatehfiles_require_family_path.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/negative/generatehfiles_require_family_path.q.out
b/hbase-handler/src/test/results/negative/generatehfiles_require_family_path.q.out
index 487dd66..52ac38f 100644
--- a/hbase-handler/src/test/results/negative/generatehfiles_require_family_path.q.out
+++ b/hbase-handler/src/test/results/negative/generatehfiles_require_family_path.q.out
@@ -1,10 +1,6 @@
-PREHOOK: query: -- -*- mode:sql -*-
-
-DROP TABLE IF EXISTS hbase_bulk
+PREHOOK: query: DROP TABLE IF EXISTS hbase_bulk
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- -*- mode:sql -*-
-
-DROP TABLE IF EXISTS hbase_bulk
+POSTHOOK: query: DROP TABLE IF EXISTS hbase_bulk
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: CREATE TABLE hbase_bulk (key INT, value STRING)
 STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b6ebb2/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 0a52782..63f8540 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -437,7 +437,8 @@ minillap.query.files=acid_bucket_pruning.q,\
   global_limit.q,\
   dynamic_partition_pruning_2.q,\
   tez_union_dynamic_partition.q,\
-  load_fs2.q
+  load_fs2.q,\
+  multi_count_distinct_null.q
 
 minillaplocal.query.files=acid_globallimit.q,\
   acid_vectorization_missing_cols.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b6ebb2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
index 417eda3..82624fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
@@ -139,6 +139,7 @@ public final class HiveExpandDistinctAggregatesRule extends RelOptRule
{
 
     if (numCountDistinct > 1 && numCountDistinct == aggregate.getAggCallList().size()
         && aggregate.getGroupSet().isEmpty()) {
+      LOG.debug("Trigger countDistinct rewrite. numCountDistinct is " + numCountDistinct);
       // now positions contains all the distinct positions, i.e., $5, $4, $6
       // we need to first sort them as group by set
       // and then get their position later, i.e., $4->1, $5->2, $6->3
@@ -186,9 +187,12 @@ public final class HiveExpandDistinctAggregatesRule extends RelOptRule
{
    * Converts an aggregate relational expression that contains only
    * count(distinct) to grouping sets with count. For example select
    * count(distinct department_id), count(distinct gender), count(distinct
-   * education_level) from employee; can be transformed to select count(case i
-   * when 1 then 1 else null end) as c0, count(case i when 2 then 1 else null
-   * end) as c1, count(case i when 4 then 1 else null end) as c2 from (select
+   * education_level) from employee; can be transformed to 
+   * select 
+   * count(case when i=1 and department_id is not null then 1 else null end) as c0, 
+   * count(case when i=2 and gender is not null then 1 else null end) as c1, 
+   * count(case when i=4 and education_level is not null then 1 else null end) as c2 
+   * from (select
    * grouping__id as i, department_id, gender, education_level from employee
    * group by department_id, gender, education_level grouping sets
    * (department_id, gender, education_level))subq;
@@ -230,13 +234,22 @@ public final class HiveExpandDistinctAggregatesRule extends RelOptRule
{
           }
         });
     final List<RexNode> gbChildProjLst = Lists.newArrayList();
+    // for singular arg, count should not include null
+    // e.g., count(case when i=1 and department_id is not null then 1 else null end) as c0,

+    // for non-singular args, count can include null, i.e. (,) is counted as 1
     for (List<Integer> list : cleanArgList) {
-      RexNode equal = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
-          originalInputRefs.get(originalInputRefs.size() - 1),
-          rexBuilder.makeExactLiteral(new BigDecimal(getGroupingIdValue(list, sourceOfForCountDistinct))));
-      RexNode condition = rexBuilder.makeCall(SqlStdOperatorTable.CASE, equal,
+      RexNode condition = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, originalInputRefs
+          .get(originalInputRefs.size() - 1), rexBuilder.makeExactLiteral(new BigDecimal(
+          getGroupingIdValue(list, sourceOfForCountDistinct))));
+      if (list.size() == 1) {
+        int pos = list.get(0);
+        RexNode notNull = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
+            originalInputRefs.get(pos));
+        condition = rexBuilder.makeCall(SqlStdOperatorTable.AND, condition, notNull);
+      }
+      RexNode when = rexBuilder.makeCall(SqlStdOperatorTable.CASE, condition,
           rexBuilder.makeExactLiteral(BigDecimal.ONE), rexBuilder.constantNull());
-      gbChildProjLst.add(condition);
+      gbChildProjLst.add(when);
     }
 
     // create the project before GB

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b6ebb2/ql/src/test/queries/clientpositive/multi_count_distinct_null.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_count_distinct_null.q b/ql/src/test/queries/clientpositive/multi_count_distinct_null.q
new file mode 100644
index 0000000..442d855
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_count_distinct_null.q
@@ -0,0 +1,45 @@
+set hive.mapred.mode=nonstrict;
+
+drop table employee;
+
+create table employee (department_id int, gender varchar(10), education_level int);
+ 
+insert into employee values (1, 'M', 1),(1, 'M', 1),(2, 'F', 1),(1, 'F', 3),(1, 'M', 2),(4,
'M', 1),(2, 'F', 1),(2, 'F', 3),(3, 'M', 2),(null, 'M', 1),(null, null, 1),(null, null, null);
+
+explain select count(distinct department_id), count(distinct gender), count(distinct education_level)
from employee;
+
+select count(distinct gender), count(distinct department_id), count(distinct education_level)
from employee;
+
+select count(distinct gender), count(distinct department_id), count(distinct gender), count(distinct
education_level) from employee;
+
+select count(distinct department_id), count(distinct gender), count(distinct education_level)
from employee;
+
+select count(distinct department_id), count(distinct gender), count(distinct education_level),
count(distinct education_level) from employee;
+
+select count(distinct department_id), count(distinct gender), count(distinct education_level),

+count(distinct education_level, department_id) from employee;
+
+select count(distinct gender), count(distinct department_id), count(distinct gender), count(distinct
education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level)
from employee;
+
+explain select count(distinct gender), count(distinct department_id), count(distinct gender),
count(distinct education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level),
count(distinct department_id, education_level, gender) from employee;
+
+select count(distinct gender), count(distinct department_id), count(distinct gender), count(distinct
education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level),
count(distinct department_id, education_level, gender) from employee;
+
+select 
+count(case when i=1 and department_id is not null then 1 else null end) as c0, 
+count(case when i=2 and gender is not null then 1 else null end) as c1, 
+count(case when i=4 and education_level is not null then 1 else null end) as c2 from 
+(select grouping__id as i, department_id, gender, 
+education_level from employee group by department_id, gender, education_level grouping sets

+(department_id, gender, education_level))subq;
+
+select grouping__id as i, department_id, gender, education_level from employee 
+group by department_id, gender, education_level grouping sets 
+(department_id, gender, education_level, (education_level, department_id));
+
+
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b6ebb2/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
new file mode 100644
index 0000000..6c13684
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
@@ -0,0 +1,312 @@
+PREHOOK: query: drop table employee
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table employee
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table employee (department_id int, gender varchar(10), education_level
int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@employee
+POSTHOOK: query: create table employee (department_id int, gender varchar(10), education_level
int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@employee
+PREHOOK: query: insert into employee values (1, 'M', 1),(1, 'M', 1),(2, 'F', 1),(1, 'F',
3),(1, 'M', 2),(4, 'M', 1),(2, 'F', 1),(2, 'F', 3),(3, 'M', 2),(null, 'M', 1),(null, null,
1),(null, null, null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@employee
+POSTHOOK: query: insert into employee values (1, 'M', 1),(1, 'M', 1),(2, 'F', 1),(1, 'F',
3),(1, 'M', 2),(4, 'M', 1),(2, 'F', 1),(2, 'F', 3),(3, 'M', 2),(null, 'M', 1),(null, null,
1),(null, null, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@employee
+POSTHOOK: Lineage: employee.department_id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: employee.education_level EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: employee.gender EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: explain select count(distinct department_id), count(distinct gender), count(distinct
education_level) from employee
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct department_id), count(distinct gender), count(distinct
education_level) from employee
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: employee
+                  Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: department_id (type: int), gender (type: varchar(10)), education_level
(type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats:
NONE
+                    Group By Operator
+                      keys: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int),
0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 36 Data size: 198 Basic stats: COMPLETE Column
stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: varchar(10)), _col2
(type: int), _col3 (type: int)
+                        sort order: ++++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(10)),
_col2 (type: int), _col3 (type: int)
+                        Statistics: Num rows: 36 Data size: 198 Basic stats: COMPLETE Column
stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: varchar(10)), KEY._col2 (type:
int), KEY._col3 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 18 Data size: 99 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: CASE WHEN (((_col3 = 1) and _col0 is not null)) THEN (1) ELSE
(null) END (type: int), CASE WHEN (((_col3 = 2) and _col1 is not null)) THEN (1) ELSE (null)
END (type: int), CASE WHEN (((_col3 = 4) and _col2 is not null)) THEN (1) ELSE (null) END
(type: int)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 18 Data size: 99 Basic stats: COMPLETE Column stats:
NONE
+                  Group By Operator
+                    aggregations: count(_col0), count(_col1), count(_col2)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2
(type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats:
NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
education_level) from employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
education_level) from employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+2	4	3
+PREHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level) from employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level) from employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+2	4	2	3
+PREHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct
education_level) from employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct
education_level) from employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+4	2	3
+PREHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct
education_level), count(distinct education_level) from employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct
education_level), count(distinct education_level) from employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+4	2	3	3
+PREHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct
education_level), 
+count(distinct education_level, department_id) from employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct
education_level), 
+count(distinct education_level, department_id) from employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+4	2	3	9
+PREHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level)
from employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level)
from employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+2	4	2	3	9	9
+PREHOOK: query: explain select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level),
count(distinct department_id, education_level, gender) from employee
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level),
count(distinct department_id, education_level, gender) from employee
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: employee
+                  Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: gender (type: varchar(10)), department_id (type: int), education_level
(type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats:
NONE
+                    Group By Operator
+                      keys: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int),
0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 60 Data size: 330 Basic stats: COMPLETE Column
stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: varchar(10)), _col1 (type: int), _col2
(type: int), _col3 (type: int)
+                        sort order: ++++
+                        Map-reduce partition columns: _col0 (type: varchar(10)), _col1 (type:
int), _col2 (type: int), _col3 (type: int)
+                        Statistics: Num rows: 60 Data size: 330 Basic stats: COMPLETE Column
stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: varchar(10)), KEY._col1 (type: int), KEY._col2 (type:
int), KEY._col3 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 30 Data size: 165 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: CASE WHEN (((_col3 = 1) and _col0 is not null)) THEN (1) ELSE
(null) END (type: int), CASE WHEN (((_col3 = 2) and _col1 is not null)) THEN (1) ELSE (null)
END (type: int), CASE WHEN (((_col3 = 4) and _col2 is not null)) THEN (1) ELSE (null) END
(type: int), CASE WHEN ((_col3 = 6)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3
= 7)) THEN (1) ELSE (null) END (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 30 Data size: 165 Basic stats: COMPLETE Column stats:
NONE
+                  Group By Operator
+                    aggregations: count(_col0), count(_col1), count(_col2), count(_col3),
count(_col4)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                    Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2
(type: bigint), _col3 (type: bigint), _col4 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2),
count(VALUE._col3), count(VALUE._col4)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), _col1 (type: bigint), _col0 (type: bigint),
_col2 (type: bigint), _col3 (type: bigint), _col3 (type: bigint), _col4 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats:
NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats:
NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level),
count(distinct department_id, education_level, gender) from employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct
gender), count(distinct education_level),
+count(distinct education_level, department_id), count(distinct department_id, education_level),
count(distinct department_id, education_level, gender) from employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+2	4	2	3	9	9	10
+PREHOOK: query: select 
+count(case when i=1 and department_id is not null then 1 else null end) as c0, 
+count(case when i=2 and gender is not null then 1 else null end) as c1, 
+count(case when i=4 and education_level is not null then 1 else null end) as c2 from 
+(select grouping__id as i, department_id, gender, 
+education_level from employee group by department_id, gender, education_level grouping sets

+(department_id, gender, education_level))subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select 
+count(case when i=1 and department_id is not null then 1 else null end) as c0, 
+count(case when i=2 and gender is not null then 1 else null end) as c1, 
+count(case when i=4 and education_level is not null then 1 else null end) as c2 from 
+(select grouping__id as i, department_id, gender, 
+education_level from employee group by department_id, gender, education_level grouping sets

+(department_id, gender, education_level))subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+4	2	3
+PREHOOK: query: select grouping__id as i, department_id, gender, education_level from employee

+group by department_id, gender, education_level grouping sets 
+(department_id, gender, education_level, (education_level, department_id))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@employee
+#### A masked pattern was here ####
+POSTHOOK: query: select grouping__id as i, department_id, gender, education_level from employee

+group by department_id, gender, education_level grouping sets 
+(department_id, gender, education_level, (education_level, department_id))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@employee
+#### A masked pattern was here ####
+1	NULL	NULL	NULL
+2	NULL	NULL	NULL
+4	NULL	NULL	NULL
+5	NULL	NULL	NULL
+4	NULL	NULL	1
+5	NULL	NULL	1
+4	NULL	NULL	2
+4	NULL	NULL	3
+2	NULL	F	NULL
+2	NULL	M	NULL
+1	1	NULL	NULL
+5	1	NULL	1
+5	1	NULL	2
+5	1	NULL	3
+1	2	NULL	NULL
+5	2	NULL	1
+5	2	NULL	3
+1	3	NULL	NULL
+5	3	NULL	2
+1	4	NULL	NULL
+5	4	NULL	1


Mime
View raw message