hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-12223: Filter on Grouping__ID does not work properly (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Thu, 05 Nov 2015 08:39:36 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 8d287c118 -> 967f5c201


HIVE-12223: Filter on Grouping__ID does not work properly (Jesus Camacho Rodriguez, reviewed
by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/967f5c20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/967f5c20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/967f5c20

Branch: refs/heads/branch-1
Commit: 967f5c201bf4782b1ceb3b85d3faf03829887d1a
Parents: 8d287c1
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Thu Nov 5 09:38:59 2015 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Thu Nov 5 09:38:59 2015 +0100

----------------------------------------------------------------------
 .../hive/ql/ppd/ExprWalkerProcFactory.java      | 12 ++++
 .../clientpositive/groupby_grouping_id3.q       | 22 +++++++
 .../clientpositive/groupby_grouping_id3.q.out   | 60 ++++++++++++++++++++
 3 files changed, 94 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/967f5c20/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
index 4df33cb..fbd6739 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
@@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -84,6 +85,17 @@ public final class ExprWalkerProcFactory {
         // replace the output expression with the input expression so that
         // parent op can understand this expression
         ExprNodeDesc exp = op.getColumnExprMap().get(colref.getColumn());
+        // if the operator is a groupby and we are referencing the grouping
+        // id column, we cannot push the predicate
+        if (op instanceof GroupByOperator) {
+          GroupByOperator groupBy = (GroupByOperator) op;
+          if (groupBy.getConf().isGroupingSetsPresent()) {
+            int groupingSetPlaceholderPos = groupBy.getConf().getKeys().size() - 1;
+            if (colref.getColumn().equals(groupBy.getSchema().getColumnNames().get(groupingSetPlaceholderPos)))
{
+              exp = null;
+            }
+          }
+        }
         if (exp == null) {
           // means that expression can't be pushed either because it is value in
           // group by

http://git-wip-us.apache.org/repos/asf/hive/blob/967f5c20/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
new file mode 100644
index 0000000..c6746a8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
@@ -0,0 +1,22 @@
+CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
+
+set hive.cbo.enable = false;
+
+-- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1;
+
+set hive.cbo.enable = true;
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/967f5c20/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out b/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out
new file mode 100644
index 0000000..c305bfd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out
@@ -0,0 +1,60 @@
+PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE
T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE
T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1	NULL	1	2
+2	NULL	1	1
+3	NULL	1	2
+4	NULL	1	1
+PREHOOK: query: SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1	NULL	1	2
+2	NULL	1	1
+3	NULL	1	2
+4	NULL	1	1


Mime
View raw message