hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-13806: Extension to folding NOT expressions in CBO (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Sat, 04 Jun 2016 09:57:32 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2.1 3ee49de19 -> df800bbe7


HIVE-13806: Extension to folding NOT expressions in CBO (Jesus Camacho Rodriguez, reviewed
by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/df800bbe
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/df800bbe
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/df800bbe

Branch: refs/heads/branch-2.1
Commit: df800bbe7a7780a3af9ff0a08cd6fa84e24a6bfa
Parents: 3ee49de
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Sat Jun 4 10:57:14 2016 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Sat Jun 4 10:57:14 2016 +0100

----------------------------------------------------------------------
 .../hive/ql/optimizer/calcite/HiveRexUtil.java  | 20 ++++++++
 .../clientpositive/folder_predicate.q.out       | 50 ++++++++++----------
 .../clientpositive/rand_partitionpruner3.q.out  | 12 ++---
 .../results/clientpositive/union_offcbo.q.out   |  8 ++--
 4 files changed, 55 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/df800bbe/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java
index a5dcffb..73a67a8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java
@@ -106,6 +106,26 @@ public class HiveRexUtil {
       return simplify(rexBuilder,
           rexBuilder.makeCall(op(negateKind2), ((RexCall) a).getOperands()));
     }
+    if (a.getKind() == SqlKind.AND) {
+      // NOT distributivity for AND
+      final List<RexNode> newOperands = new ArrayList<>();
+      for (RexNode operand : ((RexCall) a).getOperands()) {
+        newOperands.add(simplify(rexBuilder,
+            rexBuilder.makeCall(SqlStdOperatorTable.NOT, operand)));
+      }
+      return simplify(rexBuilder,
+          rexBuilder.makeCall(SqlStdOperatorTable.OR, newOperands));
+    }
+    if (a.getKind() == SqlKind.OR) {
+      // NOT distributivity for OR
+      final List<RexNode> newOperands = new ArrayList<>();
+      for (RexNode operand : ((RexCall) a).getOperands()) {
+        newOperands.add(simplify(rexBuilder,
+            rexBuilder.makeCall(SqlStdOperatorTable.NOT, operand)));
+      }
+      return simplify(rexBuilder,
+          rexBuilder.makeCall(SqlStdOperatorTable.AND, newOperands));
+    }
     return call;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/df800bbe/ql/src/test/results/clientpositive/folder_predicate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/folder_predicate.q.out b/ql/src/test/results/clientpositive/folder_predicate.q.out
index 48a4889..7fcc172 100644
--- a/ql/src/test/results/clientpositive/folder_predicate.q.out
+++ b/ql/src/test/results/clientpositive/folder_predicate.q.out
@@ -37,15 +37,15 @@ STAGE PLANS:
             alias: predicate_fold_tb
             Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not (value is not null and (value = 3))) (type: boolean)
-              Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              predicate: (value is null or (value <> 3)) (type: boolean)
+              Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats:
NONE
+                Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats:
NONE
+                  Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -88,15 +88,15 @@ STAGE PLANS:
             alias: predicate_fold_tb
             Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not (value is not null and (value >= 3))) (type: boolean)
-              Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              predicate: (value is null or (value < 3)) (type: boolean)
+              Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                  Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -137,15 +137,15 @@ STAGE PLANS:
             alias: predicate_fold_tb
             Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not (value is not null and (value <= 3))) (type: boolean)
-              Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              predicate: (value is null or (value > 3)) (type: boolean)
+              Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                  Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -186,15 +186,15 @@ STAGE PLANS:
             alias: predicate_fold_tb
             Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not (value is not null and (value > 3))) (type: boolean)
-              Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              predicate: (value is null or (value <= 3)) (type: boolean)
+              Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                  Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -236,15 +236,15 @@ STAGE PLANS:
             alias: predicate_fold_tb
             Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not (value is not null and (value < 3))) (type: boolean)
-              Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              predicate: (value is null or (value >= 3)) (type: boolean)
+              Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+                  Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats:
NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -286,15 +286,15 @@ STAGE PLANS:
             alias: predicate_fold_tb
             Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not (value is not null and (value <> 3))) (type: boolean)
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              predicate: (value is null or (value = 3)) (type: boolean)
+              Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats:
NONE
+                Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats:
NONE
+                  Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -334,7 +334,7 @@ STAGE PLANS:
             alias: predicate_fold_tb
             Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not (value is not null and (value > 1) and (value <= 3)))
(type: boolean)
+              predicate: (value is null or (value <= 1) or (value > 3)) (type: boolean)
               Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/df800bbe/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
index eabf9d9..9e2878f 100644
--- a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
+++ b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
@@ -65,12 +65,12 @@ STAGE PLANS:
           GatherStats: false
           Filter Operator
             isSamplingPred: false
-            predicate: ((rand(1) < 0.1) and (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key)
< 10.0)))) (type: boolean)
-            Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE
+            predicate: ((rand(1) < 0.1) and (UDFToDouble(key) <= 50.0) and (UDFToDouble(key)
>= 10.0)) (type: boolean)
+            Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string), '2008-04-08' (type:
string), hr (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats:
NONE
+              Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats:
NONE
               ListSink
 
 PREHOOK: query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08'
and not(key > 50 or key < 10) and a.hr like '%2'
@@ -153,12 +153,12 @@ STAGE PLANS:
           GatherStats: false
           Filter Operator
             isSamplingPred: false
-            predicate: (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key) < 10.0)))
(type: boolean)
-            Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats:
NONE
+            predicate: ((UDFToDouble(key) <= 50.0) and (UDFToDouble(key) >= 10.0))
(type: boolean)
+            Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string), '2008-04-08' (type:
string), hr (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats:
NONE
+              Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats:
NONE
               ListSink
 
 PREHOOK: query: select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or
key < 10) and a.hr like '%2'

http://git-wip-us.apache.org/repos/asf/hive/blob/df800bbe/ql/src/test/results/clientpositive/union_offcbo.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_offcbo.q.out b/ql/src/test/results/clientpositive/union_offcbo.q.out
index 71c3bfc..c71d53f 100644
--- a/ql/src/test/results/clientpositive/union_offcbo.q.out
+++ b/ql/src/test/results/clientpositive/union_offcbo.q.out
@@ -629,7 +629,7 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           Filter Operator
-            predicate: ((CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >=
'2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05')))
THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA')
END <> 'RET') and (not ((NVL(_col0,-1) = NVL(_col7,-1)) and (NVL(_col1,-1) = NVL(_col8,-1)))))
(type: boolean)
+            predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <>
NVL(_col8,-1))) and (CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05')))
THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN
('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END
<> 'RET')) (type: boolean)
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Select Operator
               expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint),
_col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col7 is not
null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null
and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and
(_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string)
@@ -719,7 +719,7 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           Filter Operator
-            predicate: ((CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <=
'2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3)
and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END <> 'RET') and (not ((NVL(_col3,-1)
= NVL(_col6,-1)) and (NVL(_col4,-1) = NVL(_col7,-1))))) (type: boolean)
+            predicate: (((NVL(_col3,-1) <> NVL(_col6,-1)) or (NVL(_col4,-1) <>
NVL(_col7,-1))) and (CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20')))
THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <>
_col4)))) THEN ('INS') ELSE ('NA') END <> 'RET')) (type: boolean)
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Select Operator
               expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint),
'2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col6
is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6
is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS')
ELSE ('NA') END (type: string)
@@ -1652,7 +1652,7 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           Filter Operator
-            predicate: ((CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >=
'2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05')))
THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA')
END <> 'RET') and (not ((NVL(_col0,-1) = NVL(_col7,-1)) and (NVL(_col1,-1) = NVL(_col8,-1)))))
(type: boolean)
+            predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <>
NVL(_col8,-1))) and (CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05')))
THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN
('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END
<> 'RET')) (type: boolean)
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Select Operator
               expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint),
_col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col7 is not
null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null
and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and
(_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string)
@@ -1742,7 +1742,7 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           Filter Operator
-            predicate: ((CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <=
'2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3)
and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END <> 'RET') and (not ((NVL(_col3,-1)
= NVL(_col6,-1)) and (NVL(_col4,-1) = NVL(_col7,-1))))) (type: boolean)
+            predicate: (((NVL(_col3,-1) <> NVL(_col6,-1)) or (NVL(_col4,-1) <>
NVL(_col7,-1))) and (CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20')))
THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <>
_col4)))) THEN ('INS') ELSE ('NA') END <> 'RET')) (type: boolean)
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Select Operator
               expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint),
'2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col6
is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6
is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS')
ELSE ('NA') END (type: string)


Mime
View raw message