impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mjac...@apache.org
Subject [5/5] incubator-impala git commit: IMPALA-5280: Coalesce chains of OR conditions to an IN predicate
Date Thu, 29 Jun 2017 16:23:03 GMT
IMPALA-5280: Coalesce chains of OR conditions to an IN predicate

This change introduces a new rule to merge disjunct equality
predicates into an IN predicate. As with every rule being applied
bottom up, the rule merges the leaf OR predicates into an in predicate
and subsequently merges the OR predicate to the existing IN predicate
It will also merge two compatible IN predicates into a single IN
predicate.

Patch also addresses review comments to
normalize the binary predicates and testcases for the same.
binary predicates of the form constant <op> non constant are normalized
to non constant <op> constant

Change-Id: If02396b752c5497de9a92828c24c8062027dc2e2
Reviewed-on: http://gerrit.cloudera.org:8080/7110
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/536a0612
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/536a0612
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/536a0612

Branch: refs/heads/master
Commit: 536a0612acce9987a990df2d8ff483ed66438146
Parents: 6311f39
Author: sandeep akinapelli <sakinapelli@cloudera.com>
Authored: Wed Jun 7 15:48:00 2017 -0700
Committer: Impala Public Jenkins <impala-public-jenkins@gerrit.cloudera.org>
Committed: Thu Jun 29 08:57:29 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/analysis/Analyzer.java    |   2 +
 .../java/org/apache/impala/analysis/Expr.java   |  11 ++
 .../rewrite/EqualityDisjunctsToInRule.java      | 109 +++++++++++++++
 .../rewrite/NormalizeBinaryPredicatesRule.java  |  30 +++--
 .../impala/analysis/ExprRewriteRulesTest.java   | 131 +++++++++++++++++--
 .../PlannerTest/constant-propagation.test       |   2 +-
 .../queries/PlannerTest/kudu-selectivity.test   |  12 +-
 .../queries/PlannerTest/subquery-rewrite.test   |   4 +-
 .../queries/PlannerTest/tpcds-all.test          |  12 +-
 .../queries/PlannerTest/tpch-all.test           |  10 +-
 .../queries/PlannerTest/tpch-kudu.test          |   2 +-
 .../queries/PlannerTest/tpch-nested.test        |   6 +-
 .../queries/PlannerTest/tpch-views.test         |   2 +-
 13 files changed, 284 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index e492aea..3527b85 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -59,6 +59,7 @@ import org.apache.impala.common.PrintUtils;
 import org.apache.impala.common.RuntimeEnv;
 import org.apache.impala.planner.PlanNode;
 import org.apache.impala.rewrite.BetweenToCompoundRule;
+import org.apache.impala.rewrite.EqualityDisjunctsToInRule;
 import org.apache.impala.rewrite.ExprRewriter;
 import org.apache.impala.rewrite.ExprRewriteRule;
 import org.apache.impala.rewrite.ExtractCommonConjunctRule;
@@ -341,6 +342,7 @@ public class Analyzer {
         rules.add(ExtractCommonConjunctRule.INSTANCE);
         // Relies on FoldConstantsRule and NormalizeExprsRule.
         rules.add(SimplifyConditionalsRule.INSTANCE);
+        rules.add(EqualityDisjunctsToInRule.INSTANCE);
       }
       exprRewriter_ = new ExprRewriter(rules);
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/fe/src/main/java/org/apache/impala/analysis/Expr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 6d6442a..fdb166d 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.ListIterator;
 import java.util.Set;
 
+import org.apache.impala.analysis.BinaryPredicate.Operator;
 import org.apache.impala.analysis.BoolLiteral;
 import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.Function;
@@ -176,6 +177,16 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode,
Cloneabl
         public boolean apply(Expr arg) { return arg instanceof BinaryPredicate; }
       };
 
+  public final static com.google.common.base.Predicate<Expr> IS_EXPR_EQ_LITERAL_PREDICATE
=
+      new com.google.common.base.Predicate<Expr>() {
+    @Override
+    public boolean apply(Expr arg) {
+      return arg instanceof BinaryPredicate
+          && ((BinaryPredicate) arg).getOp() == Operator.EQ
+          && (((BinaryPredicate) arg).getChild(1).isLiteral());
+    }
+  };
+
   public final static com.google.common.base.Predicate<Expr>
       IS_NONDETERMINISTIC_BUILTIN_FN_PREDICATE =
       new com.google.common.base.Predicate<Expr>() {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/fe/src/main/java/org/apache/impala/rewrite/EqualityDisjunctsToInRule.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/rewrite/EqualityDisjunctsToInRule.java b/fe/src/main/java/org/apache/impala/rewrite/EqualityDisjunctsToInRule.java
new file mode 100644
index 0000000..fa299df
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/rewrite/EqualityDisjunctsToInRule.java
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.rewrite;
+
+import com.google.common.collect.Lists;
+
+import java.util.List;
+
+import org.apache.impala.analysis.Analyzer;
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.InPredicate;
+import org.apache.impala.analysis.Subquery;
+import org.apache.impala.common.AnalysisException;
+
+/**
+ * Coalesces disjunctive equality predicates to an IN predicate, and merges compatible
+ * equality or IN predicates into an existing IN predicate.
+ * Examples:
+ * (C=1) OR (C=2) OR (C=3) OR (C=4) -> C IN(1, 2, 3, 4)
+ * (X+Y = 5) OR (X+Y = 6) -> X+Y IN (5, 6)
+ * (A = 1) OR (A IN (2, 3)) -> A IN (1, 2, 3)
+ * (B IN (1, 2)) OR (B IN (3, 4)) -> B IN (1, 2, 3, 4)
+ */
+public class EqualityDisjunctsToInRule implements ExprRewriteRule {
+
+  public static ExprRewriteRule INSTANCE = new EqualityDisjunctsToInRule();
+
+  @Override
+  public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+    if (!Expr.IS_OR_PREDICATE.apply(expr)) return expr;
+
+    Expr inAndOtherExpr = rewriteInAndOtherExpr(expr.getChild(0), expr.getChild(1));
+    if (inAndOtherExpr != null) return inAndOtherExpr;
+
+    Expr orChildExpr = rewriteEqEqPredicate(expr.getChild(0), expr.getChild(1));
+    if (orChildExpr != null) return orChildExpr;
+
+    return expr;
+  }
+
+  /**
+   * Takes the children of an OR predicate and attempts to combine them into a single IN
predicate.
+   * The transformation is applied if one of the children is an IN predicate and the other
child
+   * is a compatible IN predicate or equality predicate. Returns the transformed expr or
null
+   * if no transformation was possible.
+   */
+  private Expr rewriteInAndOtherExpr(Expr child0, Expr child1) {
+    InPredicate inPred = null;
+    Expr otherPred = null;
+    if (child0 instanceof InPredicate) {
+      inPred = (InPredicate) child0;
+      otherPred = child1;
+    }
+    else if (child1 instanceof InPredicate) {
+      inPred = (InPredicate) child1;
+      otherPred = child0;
+    }
+    if (inPred == null || inPred.isNotIn() || inPred.contains(Subquery.class)) return null;
+    if (!inPred.getChild(0).equals(otherPred.getChild(0))) return null;
+
+    // other predicate can be OR predicate or IN predicate
+    List<Expr> newInList = Lists.newArrayList(
+        inPred.getChildren().subList(1, inPred.getChildren().size()));
+    if (Expr.IS_EXPR_EQ_LITERAL_PREDICATE.apply(otherPred)) {
+      newInList.add(otherPred.getChild(1));
+    } else
+      if (otherPred instanceof InPredicate && !((InPredicate) otherPred).isNotIn()
+          && !otherPred.contains(Subquery.class)) {
+        newInList.addAll(
+            otherPred.getChildren().subList(1, otherPred.getChildren().size()));
+      } else {
+        return null;
+      }
+
+    return new InPredicate(inPred.getChild(0), newInList, false);
+  }
+
+  /**
+   * Takes the children of an OR predicate and attempts to combine them into a single IN
predicate.
+   * The transformation is applied if both children are equality predicates with a literal
on the
+   * right hand side.
+   * Returns the transformed expr or null if no transformation was possible.
+   */
+  private Expr rewriteEqEqPredicate(Expr child0, Expr child1) {
+    if (!Expr.IS_EXPR_EQ_LITERAL_PREDICATE.apply(child0)) return null;
+    if (!Expr.IS_EXPR_EQ_LITERAL_PREDICATE.apply(child1)) return null;
+
+    if (!child0.getChild(0).equals(child1.getChild(0))) return null;
+    Expr newExpr = new InPredicate(child0.getChild(0),
+        Lists.newArrayList(child0.getChild(1), child1.getChild(1)), false);
+    return newExpr;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/fe/src/main/java/org/apache/impala/rewrite/NormalizeBinaryPredicatesRule.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/rewrite/NormalizeBinaryPredicatesRule.java
b/fe/src/main/java/org/apache/impala/rewrite/NormalizeBinaryPredicatesRule.java
index 3a2ac57..34d7927 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/NormalizeBinaryPredicatesRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/NormalizeBinaryPredicatesRule.java
@@ -19,19 +19,20 @@ package org.apache.impala.rewrite;
 
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.BinaryPredicate;
-import org.apache.impala.analysis.BoolLiteral;
-import org.apache.impala.analysis.CompoundPredicate;
 import org.apache.impala.analysis.Expr;
-import org.apache.impala.analysis.SlotRef;
 import org.apache.impala.common.AnalysisException;
 
 /**
-   * Normalizes binary predicates of the form <expr> <op> <slot> so that
the slot is
-   * on the left hand side. Predicates where <slot> is wrapped in a cast (implicit
or
-   * explicit) are normalized, too.
+ * Normalizes binary predicates of the form <expr> <op> <slot> so that
the slot is
+ * on the left hand side. Predicates where <slot> is wrapped in a cast (implicit or
+ * explicit) are normalized, too. Predicates of the form <constant> <op> <expr>
+ * are also normalized so that <constant> is always on the right hand side.
  *
  * Examples:
  * 5 > id -> id < 5
+ * cast(0 as double) = id -> id = cast(0 as double)
+ * 5 = id + 2 -> id + 2 = 5
+ *
  */
 public class NormalizeBinaryPredicatesRule implements ExprRewriteRule {
   public static ExprRewriteRule INSTANCE = new NormalizeBinaryPredicatesRule();
@@ -39,11 +40,20 @@ public class NormalizeBinaryPredicatesRule implements ExprRewriteRule
{
   @Override
   public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
     if (!(expr instanceof BinaryPredicate)) return expr;
-    if (expr.getChild(0).unwrapSlotRef(false) != null) return expr;
-    if (expr.getChild(1).unwrapSlotRef(false) == null) return expr;
 
-    BinaryPredicate.Operator op = ((BinaryPredicate) expr).getOp();
+    if (isExprOpSlotRef(expr) || isConstantOpExpr(expr)) {
+      BinaryPredicate.Operator op = ((BinaryPredicate) expr).getOp();
+      return new BinaryPredicate(op.converse(), expr.getChild(1), expr.getChild(0));
+    }
+    return expr;
+  }
+
+  boolean isConstantOpExpr(Expr expr) {
+    return expr.getChild(0).isConstant() && !expr.getChild(1).isConstant();
+  }
 
-    return new BinaryPredicate(op.converse(), expr.getChild(1), expr.getChild(0));
+  boolean isExprOpSlotRef(Expr expr) {
+    return expr.getChild(0).unwrapSlotRef(false) == null
+        && expr.getChild(1).unwrapSlotRef(false) != null;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index 9650f76..d20aedf 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -23,6 +23,7 @@ import org.apache.impala.catalog.Catalog;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.rewrite.BetweenToCompoundRule;
+import org.apache.impala.rewrite.EqualityDisjunctsToInRule;
 import org.apache.impala.rewrite.SimplifyConditionalsRule;
 import org.apache.impala.rewrite.ExprRewriteRule;
 import org.apache.impala.rewrite.ExprRewriter;
@@ -40,35 +41,68 @@ import com.google.common.collect.Lists;
  */
 public class ExprRewriteRulesTest extends FrontendTestBase {
 
-  public Expr RewritesOk(String expr, ExprRewriteRule rule, String expectedExpr)
+  public Expr RewritesOk(String exprStr, ExprRewriteRule rule, String expectedExprStr)
       throws AnalysisException {
-    return RewritesOk("functional.alltypessmall", expr, rule, expectedExpr);
+    return RewritesOk("functional.alltypessmall", exprStr, rule, expectedExprStr);
   }
 
-  public Expr RewritesOk(String tableName, String expr, ExprRewriteRule rule, String expectedExpr)
+  public Expr RewritesOk(String tableName, String exprStr, ExprRewriteRule rule, String expectedExprStr)
       throws AnalysisException {
-    return RewritesOk(tableName, expr, Lists.newArrayList(rule), expectedExpr);
+    return RewritesOk(tableName, exprStr, Lists.newArrayList(rule), expectedExprStr);
   }
 
-  public Expr RewritesOk(String expr, List<ExprRewriteRule> rules, String expectedExpr)
+  public Expr RewritesOk(String exprStr, List<ExprRewriteRule> rules, String expectedExprStr)
       throws AnalysisException {
-    return RewritesOk("functional.alltypessmall", expr, rules, expectedExpr);
+    return RewritesOk("functional.alltypessmall", exprStr, rules, expectedExprStr);
   }
 
-  public Expr RewritesOk(String tableName, String expr, List<ExprRewriteRule> rules,
-      String expectedExpr) throws AnalysisException {
-    String stmtStr = "select " + expr + " from " + tableName;
+  public Expr RewritesOk(String tableName, String exprStr, List<ExprRewriteRule> rules,
+      String expectedExprStr) throws AnalysisException {
+    String stmtStr = "select " + exprStr + " from " + tableName;
     SelectStmt stmt = (SelectStmt) ParsesOk(stmtStr);
     Analyzer analyzer = createAnalyzer(Catalog.DEFAULT_DB);
     stmt.analyze(analyzer);
     Expr origExpr = stmt.getSelectList().getItems().get(0).getExpr();
+    Expr rewrittenExpr = verifyExprEquivalence(origExpr, expectedExprStr, rules, analyzer);
+    return rewrittenExpr;
+  }
+
+  public Expr RewritesOkWhereExpr(String exprStr, ExprRewriteRule rule, String expectedExprStr)
+      throws AnalysisException {
+    return RewritesOkWhereExpr("functional.alltypessmall", exprStr, rule, expectedExprStr);
+  }
+
+  public Expr RewritesOkWhereExpr(String tableName, String exprStr, ExprRewriteRule rule,
String expectedExprStr)
+      throws AnalysisException {
+    return RewritesOkWhereExpr(tableName, exprStr, Lists.newArrayList(rule), expectedExprStr);
+  }
+
+  public Expr RewritesOkWhereExpr(String exprStr, List<ExprRewriteRule> rules, String
expectedExprStr)
+      throws AnalysisException {
+    return RewritesOkWhereExpr("functional.alltypessmall", exprStr, rules, expectedExprStr);
+  }
+
+  public Expr RewritesOkWhereExpr(String tableName, String exprStr, List<ExprRewriteRule>
rules,
+      String expectedExprStr) throws AnalysisException {
+    String stmtStr = "select count(1)  from " + tableName + " where " + exprStr;
+    System.out.println(stmtStr);
+    SelectStmt stmt = (SelectStmt) ParsesOk(stmtStr);
+    Analyzer analyzer = createAnalyzer(Catalog.DEFAULT_DB);
+    stmt.analyze(analyzer);
+    Expr origExpr = stmt.getWhereClause();
+    Expr rewrittenExpr = verifyExprEquivalence(origExpr, expectedExprStr, rules, analyzer);
+    return rewrittenExpr;
+  }
+
+  private Expr verifyExprEquivalence(Expr origExpr, String expectedExprStr,
+      List<ExprRewriteRule> rules, Analyzer analyzer) throws AnalysisException {
     String origSql = origExpr.toSql();
     ExprRewriter rewriter = new ExprRewriter(rules);
     Expr rewrittenExpr = rewriter.rewrite(origExpr, analyzer);
     String rewrittenSql = rewrittenExpr.toSql();
-    boolean expectChange = expectedExpr != null;
-    if (expectedExpr != null) {
-      assertEquals(expectedExpr, rewrittenSql);
+    boolean expectChange = expectedExprStr != null;
+    if (expectedExprStr != null) {
+      assertEquals(expectedExprStr, rewrittenSql);
     } else {
       assertEquals(origSql, rewrittenSql);
     }
@@ -76,6 +110,7 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
     return rewrittenExpr;
   }
 
+
   /**
    * Helper for prettier error messages than what JUnit.Assert provides.
    */
@@ -398,13 +433,81 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
     RewritesOk("0 = id", rule, "id = 0");
     RewritesOk("cast(0 as double) = id", rule, "id = CAST(0 AS DOUBLE)");
     RewritesOk("1 + 1 = cast(id as int)", rule, "CAST(id AS INT) = 1 + 1");
+    RewritesOk("5 = id + 2", rule, "id + 2 = 5");
+    RewritesOk("5 + 3 = id", rule, "id = 5 + 3");
+    RewritesOk("tinyint_col + smallint_col = int_col", rule,
+        "int_col = tinyint_col + smallint_col");
+
 
     // Verify that these don't get rewritten.
+    RewritesOk("5 = 6", rule, null);
     RewritesOk("id = 5", rule, null);
-    RewritesOk("5 = id + 2", rule, null);
     RewritesOk("cast(id as int) = int_col", rule, null);
     RewritesOk("int_col = cast(id as int)", rule, null);
     RewritesOk("int_col = tinyint_col", rule, null);
     RewritesOk("tinyint_col = int_col", rule, null);
   }
-}
\ No newline at end of file
+
+  @Test
+  public void TestEqualityDisjunctsToInRule() throws AnalysisException {
+    ExprRewriteRule edToInrule = EqualityDisjunctsToInRule.INSTANCE;
+    ExprRewriteRule normalizeRule = NormalizeBinaryPredicatesRule.INSTANCE;
+    List<ExprRewriteRule> comboRules = Lists.newArrayList(normalizeRule,
+        edToInrule);
+
+    RewritesOk("int_col = 1 or int_col = 2", edToInrule, "int_col IN (1, 2)");
+    RewritesOk("int_col = 1 or int_col = 2 or int_col = 3", edToInrule,
+        "int_col IN (1, 2, 3)");
+    RewritesOk("(int_col = 1 or int_col = 2) or (int_col = 3 or int_col = 4)", edToInrule,
+        "int_col IN (1, 2, 3, 4)");
+    RewritesOk("float_col = 1.1 or float_col = 2.2 or float_col = 3.3",
+        edToInrule, "float_col IN (1.1, 2.2, 3.3)");
+    RewritesOk("string_col = '1' or string_col = '2' or string_col = '3'",
+        edToInrule, "string_col IN ('1', '2', '3')");
+    RewritesOk("bool_col = true or bool_col = false or bool_col = true", edToInrule,
+        "bool_col IN (TRUE, FALSE, TRUE)");
+    RewritesOk("bool_col = null or bool_col = null or bool_col is null", edToInrule,
+        "bool_col IN (NULL, NULL) OR bool_col IS NULL");
+    RewritesOk("int_col * 3 = 6 or int_col * 3 = 9 or int_col * 3 = 12",
+        edToInrule, "int_col * 3 IN (6, 9, 12)");
+
+    // cases where rewrite should happen partially
+    RewritesOk("(int_col = 1 or int_col = 2) or (int_col = 3 and int_col = 4)",
+        edToInrule, "int_col IN (1, 2) OR (int_col = 3 AND int_col = 4)");
+    RewritesOk(
+        "1 = int_col or 2 = int_col or 3 = int_col AND (float_col = 5 or float_col = 6)",
+        edToInrule,
+        "1 = int_col OR 2 = int_col OR 3 = int_col AND float_col IN (5, 6)");
+    RewritesOk("int_col * 3 = 6 or int_col * 3 = 9 or int_col * 3 <= 12",
+        edToInrule, "int_col * 3 IN (6, 9) OR int_col * 3 <= 12");
+
+    // combo rules
+    RewritesOk(
+        "1 = int_col or 2 = int_col or 3 = int_col AND (float_col = 5 or float_col = 6)",
+        comboRules, "int_col IN (1, 2) OR int_col = 3 AND float_col IN (5, 6)");
+
+    // existing in predicate
+    RewritesOk("int_col in (1,2) or int_col = 3", edToInrule,
+        "int_col IN (1, 2, 3)");
+    RewritesOk("int_col = 1 or int_col in (2, 3)", edToInrule,
+        "int_col IN (2, 3, 1)");
+    RewritesOk("int_col in (1, 2) or int_col in (3, 4)", edToInrule,
+        "int_col IN (1, 2, 3, 4)");
+
+    // no rewrite
+    RewritesOk("int_col = smallint_col or int_col = bigint_col ", edToInrule, null);
+    RewritesOk("int_col = 1 or int_col = int_col ", edToInrule, null);
+    RewritesOk("int_col = 1 or int_col = int_col + 3 ", edToInrule, null);
+    RewritesOk("int_col in (1, 2) or int_col = int_col + 3 ", edToInrule, null);
+    RewritesOk("int_col not in (1,2) or int_col = 3", edToInrule, null);
+    RewritesOk("int_col = 3 or int_col not in (1,2)", edToInrule, null);
+    RewritesOk("int_col not in (1,2) or int_col not in (3, 4)", edToInrule, null);
+    RewritesOk("int_col in (1,2) or int_col not in (3, 4)", edToInrule, null);
+
+    // TODO if subqueries are supported in OR clause in future, add tests to cover the same.
+    RewritesOkWhereExpr(
+        "int_col = 1 and int_col in "
+            + "(select smallint_col from functional.alltypessmall where smallint_col<10)",
+        edToInrule, null);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
index f9b9977..067698a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
@@ -52,7 +52,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.widetable_250_cols a]
    partitions=1/1 files=1 size=28.69KB
-   predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, 55 = a.int_col4 / 10
+   predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 / 10 = 55
 ====
 # Another impossibility (a.int_col3 = a.int_col2 * 5 = a.int_col2 * -7)
 select * from functional.widetable_250_cols a

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index 3dd5168..48e98af 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -82,16 +82,16 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
      mem-estimate=0B mem-reservation=0B
      tuple-ids=0 row-size=124B cardinality=3317
 ====
-select * from functional_kudu.zipcode_incomes where id = '1' or id = '2'
+select * from functional_kudu.zipcode_incomes where id = '1' or id = '2' or zip = '3'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
   PLAN-ROOT SINK
   |  mem-estimate=0B mem-reservation=0B
   |
   00:SCAN KUDU [functional_kudu.zipcode_incomes]
-     predicates: id = '1' OR id = '2'
+     predicates: id IN ('1', '2') OR zip = '3'
      mem-estimate=0B mem-reservation=0B
-     tuple-ids=0 row-size=124B cardinality=2
+     tuple-ids=0 row-size=124B cardinality=3
 ---- DISTRIBUTEDPLAN
 F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
   PLAN-ROOT SINK
@@ -99,15 +99,15 @@ F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
   |
   01:EXCHANGE [UNPARTITIONED]
      mem-estimate=0B mem-reservation=0B
-     tuple-ids=0 row-size=124B cardinality=2
+     tuple-ids=0 row-size=124B cardinality=3
 
 F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
   DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
   |  mem-estimate=0B mem-reservation=0B
   00:SCAN KUDU [functional_kudu.zipcode_incomes]
-     predicates: id = '1' OR id = '2'
+     predicates: id IN ('1', '2') OR zip = '3'
      mem-estimate=0B mem-reservation=0B
-     tuple-ids=0 row-size=124B cardinality=2
+     tuple-ids=0 row-size=124B cardinality=3
 ====
 select * from functional_kudu.alltypes where
 -- predicates that can be pushed

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index ee8f59a..9823a8b 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -1231,7 +1231,7 @@ PLAN-ROOT SINK
 |
 |--02:AGGREGATE [FINALIZE]
 |  |  output: count(*)
-|  |  having: 100 < count(*)
+|  |  having: count(*) > 100
 |  |
 |  01:SCAN HDFS [functional.alltypes]
 |     partitions=24/24 files=24 size=478.45KB
@@ -1256,7 +1256,7 @@ PLAN-ROOT SINK
 |
 |--03:AGGREGATE [FINALIZE]
 |  |  output: count(id)
-|  |  having: 100 > count(id)
+|  |  having: count(id) < 100
 |  |
 |  02:AGGREGATE
 |  |  group by: id

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
index e944500..ea076e4 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
@@ -1205,7 +1205,7 @@ PLAN-ROOT SINK
 |  |
 |  |--03:SCAN HDFS [tpcds.household_demographics]
 |  |     partitions=1/1 files=1 size=141.07KB
-|  |     predicates: household_demographics.hd_vehicle_count > 0, (CASE WHEN household_demographics.hd_vehicle_count
> 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
ELSE NULL END) > 1.2, (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential
= 'unknown')
+|  |     predicates: household_demographics.hd_vehicle_count > 0, household_demographics.hd_buy_potential
IN ('>10000', 'unknown'), (CASE WHEN household_demographics.hd_vehicle_count > 0 THEN
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END)
> 1.2
 |  |
 |  00:SCAN HDFS [tpcds.store_sales]
 |     partitions=1824/1824 files=1824 size=326.32MB
@@ -1269,7 +1269,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  03:SCAN HDFS [tpcds.household_demographics]
 |  |     partitions=1/1 files=1 size=141.07KB
-|  |     predicates: household_demographics.hd_vehicle_count > 0, (CASE WHEN household_demographics.hd_vehicle_count
> 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
ELSE NULL END) > 1.2, (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential
= 'unknown')
+|  |     predicates: household_demographics.hd_vehicle_count > 0, household_demographics.hd_buy_potential
IN ('>10000', 'unknown'), (CASE WHEN household_demographics.hd_vehicle_count > 0 THEN
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END)
> 1.2
 |  |
 |  00:SCAN HDFS [tpcds.store_sales]
 |     partitions=1824/1824 files=1824 size=326.32MB
@@ -1349,7 +1349,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  03:SCAN HDFS [tpcds.household_demographics]
 |  |     partitions=1/1 files=1 size=141.07KB
-|  |     predicates: household_demographics.hd_vehicle_count > 0, (CASE WHEN household_demographics.hd_vehicle_count
> 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
ELSE NULL END) > 1.2, (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential
= 'unknown')
+|  |     predicates: household_demographics.hd_vehicle_count > 0, household_demographics.hd_buy_potential
IN ('>10000', 'unknown'), (CASE WHEN household_demographics.hd_vehicle_count > 0 THEN
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END)
> 1.2
 |  |
 |  00:SCAN HDFS [tpcds.store_sales]
 |     partitions=1824/1824 files=1824 size=326.32MB
@@ -3761,7 +3761,7 @@ PLAN-ROOT SINK
 |  |
 |  |--03:SCAN HDFS [tpcds.household_demographics]
 |  |     partitions=1/1 files=1 size=141.07KB
-|  |     predicates: household_demographics.hd_vehicle_count > 0, CASE WHEN household_demographics.hd_vehicle_count
> 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
ELSE NULL END > 1, (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential
= 'unknown')
+|  |     predicates: household_demographics.hd_vehicle_count > 0, household_demographics.hd_buy_potential
IN ('>10000', 'unknown'), CASE WHEN household_demographics.hd_vehicle_count > 0 THEN
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END
> 1
 |  |
 |  00:SCAN HDFS [tpcds.store_sales]
 |     partitions=1824/1824 files=1824 size=326.32MB
@@ -3824,7 +3824,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  03:SCAN HDFS [tpcds.household_demographics]
 |  |     partitions=1/1 files=1 size=141.07KB
-|  |     predicates: household_demographics.hd_vehicle_count > 0, CASE WHEN household_demographics.hd_vehicle_count
> 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
ELSE NULL END > 1, (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential
= 'unknown')
+|  |     predicates: household_demographics.hd_vehicle_count > 0, household_demographics.hd_buy_potential
IN ('>10000', 'unknown'), CASE WHEN household_demographics.hd_vehicle_count > 0 THEN
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END
> 1
 |  |
 |  00:SCAN HDFS [tpcds.store_sales]
 |     partitions=1824/1824 files=1824 size=326.32MB
@@ -3903,7 +3903,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  03:SCAN HDFS [tpcds.household_demographics]
 |  |     partitions=1/1 files=1 size=141.07KB
-|  |     predicates: household_demographics.hd_vehicle_count > 0, CASE WHEN household_demographics.hd_vehicle_count
> 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
ELSE NULL END > 1, (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential
= 'unknown')
+|  |     predicates: household_demographics.hd_vehicle_count > 0, household_demographics.hd_buy_potential
IN ('>10000', 'unknown'), CASE WHEN household_demographics.hd_vehicle_count > 0 THEN
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END
> 1
 |  |
 |  00:SCAN HDFS [tpcds.store_sales]
 |     partitions=1824/1824 files=1824 size=326.32MB

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
index 219e5a9..4713ff3 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
@@ -2353,7 +2353,7 @@ PLAN-ROOT SINK
 |  order by: l_shipmode ASC
 |
 03:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH' THEN
1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH'
THEN 1 ELSE 0 END)
+|  output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE
WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
 |  group by: l_shipmode
 |
 02:HASH JOIN [INNER JOIN]
@@ -2377,13 +2377,13 @@ PLAN-ROOT SINK
 |  order by: l_shipmode ASC
 |
 08:AGGREGATE [FINALIZE]
-|  output: sum:merge(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH'
THEN 1 ELSE 0 END), sum:merge(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority
!= '2-HIGH' THEN 1 ELSE 0 END)
+|  output: sum:merge(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END),
sum:merge(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE
0 END)
 |  group by: l_shipmode
 |
 07:EXCHANGE [HASH(l_shipmode)]
 |
 03:AGGREGATE [STREAMING]
-|  output: sum(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH' THEN
1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH'
THEN 1 ELSE 0 END)
+|  output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE
WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
 |  group by: l_shipmode
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -2411,13 +2411,13 @@ PLAN-ROOT SINK
 |  order by: l_shipmode ASC
 |
 08:AGGREGATE [FINALIZE]
-|  output: sum:merge(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH'
THEN 1 ELSE 0 END), sum:merge(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority
!= '2-HIGH' THEN 1 ELSE 0 END)
+|  output: sum:merge(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END),
sum:merge(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE
0 END)
 |  group by: l_shipmode
 |
 07:EXCHANGE [HASH(l_shipmode)]
 |
 03:AGGREGATE [STREAMING]
-|  output: sum(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH' THEN
1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH'
THEN 1 ELSE 0 END)
+|  output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE
WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
 |  group by: l_shipmode
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
index 950fb90..2dde552 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
@@ -718,7 +718,7 @@ PLAN-ROOT SINK
 |  order by: l_shipmode ASC
 |
 03:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH' THEN
1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH'
THEN 1 ELSE 0 END)
+|  output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE
WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
 |  group by: l_shipmode
 |
 02:HASH JOIN [INNER JOIN]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
index 7b9ff13..4611575 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
@@ -1478,7 +1478,7 @@ PLAN-ROOT SINK
 |  order by: l_shipmode ASC
 |
 05:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH' THEN
1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH'
THEN 1 ELSE 0 END)
+|  output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE
WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
 |  group by: l_shipmode
 |
 01:SUBPLAN
@@ -1503,13 +1503,13 @@ PLAN-ROOT SINK
 |  order by: l_shipmode ASC
 |
 08:AGGREGATE [FINALIZE]
-|  output: sum:merge(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH'
THEN 1 ELSE 0 END), sum:merge(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority
!= '2-HIGH' THEN 1 ELSE 0 END)
+|  output: sum:merge(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END),
sum:merge(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE
0 END)
 |  group by: l_shipmode
 |
 07:EXCHANGE [HASH(l_shipmode)]
 |
 05:AGGREGATE [STREAMING]
-|  output: sum(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH' THEN
1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH'
THEN 1 ELSE 0 END)
+|  output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE
WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
 |  group by: l_shipmode
 |
 01:SUBPLAN

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/536a0612/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
index c927b8b..1345cab 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
@@ -853,7 +853,7 @@ PLAN-ROOT SINK
 |  order by: l_shipmode ASC
 |
 03:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN tpch.orders.o_orderpriority = '1-URGENT' OR tpch.orders.o_orderpriority
= '2-HIGH' THEN 1 ELSE 0 END), sum(CASE WHEN tpch.orders.o_orderpriority != '1-URGENT' AND
tpch.orders.o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
+|  output: sum(CASE WHEN tpch.orders.o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE
0 END), sum(CASE WHEN tpch.orders.o_orderpriority != '1-URGENT' AND tpch.orders.o_orderpriority
!= '2-HIGH' THEN 1 ELSE 0 END)
 |  group by: tpch.lineitem.l_shipmode
 |
 02:HASH JOIN [INNER JOIN]



Mime
View raw message