Mailing-List: contact commits-help@hive.apache.org; run by ezmlm
Precedence: bulk
Reply-To: hive-dev@hive.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: sershe@apache.org
To: commits@hive.apache.org
Date: Wed, 31 Aug 2016 22:43:20 -0000
Message-Id: <808cb97f967c4e50a20ae7a68c279548@git.apache.org>
In-Reply-To: <ea4caea8811a40319296aefec571b611@git.apache.org>
References: <ea4caea8811a40319296aefec571b611@git.apache.org>
Subject: [2/2] hive git commit: HIVE-14652 : incorrect results for not in on
 partition columns (Sergey Shelukhin, reviewed by Jesus Camacho Rodriguez)
archived-at: Wed, 31 Aug 2016 22:43:22 -0000

HIVE-14652 : incorrect results for not in on partition columns (Sergey Shelukhin, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9b39504
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9b39504
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9b39504

Branch: refs/heads/branch-2.1
Commit: b9b395046f11f427fad0a6f5f7b50bb6db6e354e
Parents: 8a25c54
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Wed Aug 31 15:36:47 2016 -0700
Committer: Sergey Shelukhin <sershe@apache.org>
Committed: Wed Aug 31 15:42:50 2016 -0700

----------------------------------------------------------------------
 .../ql/optimizer/pcr/PcrExprProcFactory.java    | 423 +++++++++----------
 .../hadoop/hive/ql/parse/ParseContext.java      |  25 ++
 .../partition_condition_remover.q               |  13 +
 .../partition_condition_remover.q.out           |  79 ++++
 ql/src/test/results/clientpositive/pcs.q.out    |   4 +-
 5 files changed, 330 insertions(+), 214 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
index f9388e2..461dbe5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.optimizer.pcr;
 
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
+
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -185,7 +187,7 @@ public final class PcrExprProcFactory {
   }
 
   public enum WalkState {
-    PART_COL, TRUE, FALSE, CONSTANT, UNKNOWN, DIVIDED
+    PART_COL, TRUE, FALSE, CONSTANT, UNKNOWN, DIVIDED, PART_COL_STRUCT
   }
 
   public static class NodeInfoWrapper {
@@ -253,242 +255,239 @@ public final class PcrExprProcFactory {
         Object... nodeOutputs) throws SemanticException {
       PcrExprProcCtx ctx = (PcrExprProcCtx) procCtx;
       ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd;
+      if (LOG.isDebugEnabled()) {
+        String err = "Processing " + fd.getExprString() + " "
+            + fd.getGenericUDF().getUdfName() + " outputs ";
+        for (Object child : nodeOutputs) {
+          NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
+          err += "{" + wrapper.state + ", " + wrapper.outExpr + "}, ";
+        }
+        LOG.debug(err);
+      }
 
       if (FunctionRegistry.isOpNot(fd)) {
-        assert (nodeOutputs.length == 1);
-        NodeInfoWrapper wrapper = (NodeInfoWrapper) nodeOutputs[0];
-        if (wrapper.state == WalkState.TRUE) {
-          ExprNodeConstantDesc falseDesc = new ExprNodeConstantDesc(
-              wrapper.outExpr.getTypeInfo(), Boolean.FALSE);
-          return new NodeInfoWrapper(WalkState.FALSE, null, falseDesc);
-        } else if (wrapper.state == WalkState.FALSE) {
-          ExprNodeConstantDesc trueDesc = new ExprNodeConstantDesc(
-              wrapper.outExpr.getTypeInfo(), Boolean.TRUE);
-          return new NodeInfoWrapper(WalkState.TRUE, null, trueDesc);
-        } else if (wrapper.state == WalkState.DIVIDED) {
+        return handleUdfNot(ctx, fd, nodeOutputs);
+      } else if (FunctionRegistry.isOpAnd(fd)) {
+        return handleUdfAnd(ctx, fd, nodeOutputs);
+      } else if (FunctionRegistry.isOpOr(fd)) {
+        return handleUdfOr(ctx, fd, nodeOutputs);
+      } else if (FunctionRegistry.isIn(fd)) {
+        List<ExprNodeDesc> children = fd.getChildren();
+        // We should not remove the dynamic partition pruner generated synthetic predicates.
+        for (int i = 1; i < children.size(); i++) {
+          if (children.get(i) instanceof ExprNodeDynamicListDesc) {
+            return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+          }
+        }
+        // Otherwise, handle like a normal generic UDF.
+        return handleDeterministicUdf(ctx, fd, nodeOutputs);
+      } else if (fd.getGenericUDF() instanceof GenericUDFStruct) {
+        // Handle structs composed of partition columns,
+        for (Object child : nodeOutputs) {
+          NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
+          if (wrapper.state != WalkState.PART_COL) {
+            return handleDeterministicUdf(ctx, fd, nodeOutputs); // Giving up.
+          }
+        }
+        return new NodeInfoWrapper(WalkState.PART_COL_STRUCT, null, getOutExpr(fd, nodeOutputs));
+      } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
+        // If it's a non-deterministic UDF, set unknown to true
+        return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+      } else {
+        return handleDeterministicUdf(ctx, fd, nodeOutputs);
+      }
+    }
+
+    private Object handleDeterministicUdf(PcrExprProcCtx ctx,
+        ExprNodeGenericFuncDesc fd, Object... nodeOutputs)
+        throws SemanticException {
+      Boolean has_part_col = checkForPartColsAndUnknown(fd, nodeOutputs);
+      if (has_part_col == null) {
+        return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+      }
+
+      if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) {
+        //  we need to evaluate result for every pruned partition
+        if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) {
+          // if the return type of the GenericUDF is boolean and all partitions agree on
+          // a result, we update the state of the node to be TRUE of FALSE
           Boolean[] results = new Boolean[ctx.getPartList().size()];
           for (int i = 0; i < ctx.getPartList().size(); i++) {
-            results[i] = opNot(wrapper.ResultVector[i]);
+            results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i),
+                ctx.getVirtualColumns());
           }
-          return new NodeInfoWrapper(WalkState.DIVIDED, results,
-              getOutExpr(fd, nodeOutputs));
+          return getResultWrapFromResults(results, fd, nodeOutputs);
+        }
+
+        // the case that return type of the GenericUDF is not boolean, and if not all partition
+        // agree on result, we make the node UNKNOWN. If they all agree, we replace the node
+        // to be a CONSTANT node with value to be the agreed result.
+        Object[] results = new Object[ctx.getPartList().size()];
+        for (int i = 0; i < ctx.getPartList().size(); i++) {
+          results[i] = evalExprWithPart(fd, ctx.getPartList().get(i), ctx.getVirtualColumns());
+        }
+        Object result = ifResultsAgree(results);
+        if (result == null) {
+          // if the result is not boolean and not all partition agree on the
+          // result, we don't remove the condition. Potentially, it can miss
+          // the case like "where ds % 3 == 1 or ds % 3 == 2"
+          // TODO: handle this case by making result vector to handle all
+          // constant values.
+          return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+        }
+        return new NodeInfoWrapper(WalkState.CONSTANT, null,
+            new ExprNodeConstantDesc(fd.getTypeInfo(), result));
+      }
+
+      // Try to fold, otherwise return the expression itself
+      final ExprNodeGenericFuncDesc desc = getOutExpr(fd, nodeOutputs);
+      final ExprNodeDesc foldedDesc = ConstantPropagateProcFactory.foldExpr(desc);
+      if (foldedDesc != null && foldedDesc instanceof ExprNodeConstantDesc) {
+        ExprNodeConstantDesc constant = (ExprNodeConstantDesc) foldedDesc;
+        if (Boolean.TRUE.equals(constant.getValue())) {
+          return new NodeInfoWrapper(WalkState.TRUE, null, constant);
+        } else if (Boolean.FALSE.equals(constant.getValue())) {
+          return new NodeInfoWrapper(WalkState.FALSE, null, constant);
         } else {
-          return new NodeInfoWrapper(wrapper.state, null,
-              getOutExpr(fd, nodeOutputs));
+          return new NodeInfoWrapper(WalkState.CONSTANT, null, constant);
         }
-      } else if (FunctionRegistry.isOpAnd(fd)) {
-        boolean anyUnknown = false; // Whether any of the node outputs is unknown
-        boolean allDivided = true; // Whether all of the node outputs are divided
-        List<NodeInfoWrapper> newNodeOutputsList =
-                new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
-        for (int i = 0; i < nodeOutputs.length; i++) {
-          NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
-          if (c.state == WalkState.FALSE) {
-            return c;
-          }
-          if (c.state == WalkState.UNKNOWN) {
-            anyUnknown = true;
-          }
-          if (c.state != WalkState.DIVIDED) {
-            allDivided = false;
-          }
-          if (c.state != WalkState.TRUE) {
-            newNodeOutputsList.add(c);
-          }
+      }
+      return new NodeInfoWrapper(WalkState.CONSTANT, null, desc);
+    }
+
+    private Boolean checkForPartColsAndUnknown(ExprNodeGenericFuncDesc fd,
+        Object... nodeOutputs) {
+      boolean has_part_col = false;
+      for (Object child : nodeOutputs) {
+        NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
+        if (wrapper.state == WalkState.UNKNOWN) {
+          return null;
+        } else if (wrapper.state == WalkState.PART_COL
+            || wrapper.state == WalkState.PART_COL_STRUCT) {
+          has_part_col = true;
         }
-        // If all of them were true, return true
-        if (newNodeOutputsList.size() == 0) {
-          return new NodeInfoWrapper(WalkState.TRUE, null,
-                  new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE));
+      }
+      return has_part_col;
+    }
+
+    private Object handleUdfOr(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd,
+        Object... nodeOutputs) {
+      boolean anyUnknown = false; // Whether any of the node outputs is unknown
+      boolean allDivided = true; // Whether all of the node outputs are divided
+      List<NodeInfoWrapper> newNodeOutputsList =
+              new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
+      for (int i = 0; i< nodeOutputs.length; i++) {
+        NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
+        if (c.state == WalkState.TRUE) {
+          return c;
         }
-        // If we are left with a single child, return the child
-        if (newNodeOutputsList.size() == 1) {
-          return newNodeOutputsList.get(0);
+        if (c.state == WalkState.UNKNOWN) {
+          anyUnknown = true;
         }
-        Object[] newNodeOutputs = newNodeOutputsList.toArray();
-        if (anyUnknown) {
-          return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+        if (c.state != WalkState.DIVIDED) {
+          allDivided = false;
         }
-        if (allDivided) {
-          Boolean[] results = new Boolean[ctx.getPartList().size()];
-          for (int i = 0; i < ctx.getPartList().size(); i++) {
-            Boolean[] andArray = new Boolean[newNodeOutputs.length];
-            for (int j = 0; j < newNodeOutputs.length; j++) {
-              andArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
-            }
-            results[i] = opAnd(andArray);
-          }
-          return getResultWrapFromResults(results, fd, newNodeOutputs);
+        if (c.state != WalkState.FALSE) {
+          newNodeOutputsList.add(c);
         }
+      }
+      // If all of them were false, return false
+      if (newNodeOutputsList.size() == 0) {
+        return new NodeInfoWrapper(WalkState.FALSE, null,
+                new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.FALSE));
+      }
+      // If we are left with a single child, return the child
+      if (newNodeOutputsList.size() == 1) {
+        return newNodeOutputsList.get(0);
+      }
+      Object[] newNodeOutputs = newNodeOutputsList.toArray();
+      if (anyUnknown) {
         return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
-      } else if (FunctionRegistry.isOpOr(fd)) {
-        boolean anyUnknown = false; // Whether any of the node outputs is unknown
-        boolean allDivided = true; // Whether all of the node outputs are divided
-        List<NodeInfoWrapper> newNodeOutputsList =
-                new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
-        for (int i = 0; i< nodeOutputs.length; i++) {
-          NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
-          if (c.state == WalkState.TRUE) {
-            return c;
-          }
-          if (c.state == WalkState.UNKNOWN) {
-            anyUnknown = true;
-          }
-          if (c.state != WalkState.DIVIDED) {
-            allDivided = false;
-          }
-          if (c.state != WalkState.FALSE) {
-            newNodeOutputsList.add(c);
+      }
+      if (allDivided) {
+        Boolean[] results = new Boolean[ctx.getPartList().size()];
+        for (int i = 0; i < ctx.getPartList().size(); i++) {
+          Boolean[] orArray = new Boolean[newNodeOutputs.length];
+          for (int j = 0; j < newNodeOutputs.length; j++) {
+            orArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
           }
+          results[i] = opOr(orArray);
         }
-        // If all of them were false, return false
-        if (newNodeOutputsList.size() == 0) {
-          return new NodeInfoWrapper(WalkState.FALSE, null,
-                  new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.FALSE));
+        return getResultWrapFromResults(results, fd, newNodeOutputs);
+      }
+      return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+    }
+
+    private Object handleUdfAnd(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd,
+        Object... nodeOutputs) {
+      boolean anyUnknown = false; // Whether any of the node outputs is unknown
+      boolean allDivided = true; // Whether all of the node outputs are divided
+      List<NodeInfoWrapper> newNodeOutputsList =
+              new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
+      for (int i = 0; i < nodeOutputs.length; i++) {
+        NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
+        if (c.state == WalkState.FALSE) {
+          return c;
         }
-        // If we are left with a single child, return the child
-        if (newNodeOutputsList.size() == 1) {
-          return newNodeOutputsList.get(0);
+        if (c.state == WalkState.UNKNOWN) {
+          anyUnknown = true;
         }
-        Object[] newNodeOutputs = newNodeOutputsList.toArray();
-        if (anyUnknown) {
-          return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+        if (c.state != WalkState.DIVIDED) {
+          allDivided = false;
         }
-        if (allDivided) {
-          Boolean[] results = new Boolean[ctx.getPartList().size()];
-          for (int i = 0; i < ctx.getPartList().size(); i++) {
-            Boolean[] orArray = new Boolean[newNodeOutputs.length];
-            for (int j = 0; j < newNodeOutputs.length; j++) {
-              orArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
-            }
-            results[i] = opOr(orArray);
-          }
-          return getResultWrapFromResults(results, fd, newNodeOutputs);
+        if (c.state != WalkState.TRUE) {
+          newNodeOutputsList.add(c);
         }
+      }
+      // If all of them were true, return true
+      if (newNodeOutputsList.size() == 0) {
+        return new NodeInfoWrapper(WalkState.TRUE, null,
+                new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE));
+      }
+      // If we are left with a single child, return the child
+      if (newNodeOutputsList.size() == 1) {
+        return newNodeOutputsList.get(0);
+      }
+      Object[] newNodeOutputs = newNodeOutputsList.toArray();
+      if (anyUnknown) {
         return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
-      } else if (FunctionRegistry.isIn(fd)) {
-        List<ExprNodeDesc> children = fd.getChildren();
-        boolean removePredElem = false;
-        ExprNodeDesc lhs = children.get(0);
-
-        if (lhs instanceof ExprNodeColumnDesc) {
-          // It is an IN clause on a column
-          if (((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) {
-            // It is a partition column, we can proceed
-            removePredElem = true;
-          }
-          if (removePredElem) {
-            // We should not remove the dynamic partition pruner generated synthetic predicates.
-            for (int i = 1; i < children.size(); i++) {
-              if (children.get(i) instanceof ExprNodeDynamicListDesc) {
-                removePredElem = false;
-                break;
-              }
-            }
-          }
-        } else if (lhs instanceof ExprNodeGenericFuncDesc) {
-          // It is an IN clause on a struct
-          // Make sure that the generic udf is deterministic
-          if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs)
-              .getGenericUDF())) {
-            boolean hasOnlyPartCols = true;
-            boolean hasDynamicListDesc = false;
-
-            for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) {
-              // Check if the current field expression contains only
-              // partition column or a virtual column or constants.
-              // If yes, this filter predicate is a candidate for this optimization.
-              if (!(ed instanceof ExprNodeColumnDesc &&
-                   ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) {
-                hasOnlyPartCols = false;
-                break;
-              }
-            }
-
-            // If we have non-partition columns, we cannot remove the predicate.
-            if (hasOnlyPartCols) {
-              // We should not remove the dynamic partition pruner generated synthetic predicates.
-              for (int i = 1; i < children.size(); i++) {
-                if (children.get(i) instanceof ExprNodeDynamicListDesc) {
-                  hasDynamicListDesc = true;
-                  break;
-                }
-              }
-            }
-
-            removePredElem = hasOnlyPartCols && !hasDynamicListDesc;
+      }
+      if (allDivided) {
+        Boolean[] results = new Boolean[ctx.getPartList().size()];
+        for (int i = 0; i < ctx.getPartList().size(); i++) {
+          Boolean[] andArray = new Boolean[newNodeOutputs.length];
+          for (int j = 0; j < newNodeOutputs.length; j++) {
+            andArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
           }
+          results[i] = opAnd(andArray);
         }
+        return getResultWrapFromResults(results, fd, newNodeOutputs);
+      }
+      return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+    }
 
-        // If removePredElem is set to true, return true as this is a potential candidate
-        // for partition condition remover. Else, set the WalkState for this node to unknown.
-        return removePredElem ?
-          new NodeInfoWrapper(WalkState.TRUE, null,
-          new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) :
-          new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ;
-      } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
-        // If it's a non-deterministic UDF, set unknown to true
-        return new NodeInfoWrapper(WalkState.UNKNOWN, null,
+    private Object handleUdfNot(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd,
+        Object... nodeOutputs) {
+      assert (nodeOutputs.length == 1);
+      NodeInfoWrapper wrapper = (NodeInfoWrapper) nodeOutputs[0];
+      if (wrapper.state == WalkState.TRUE) {
+        ExprNodeConstantDesc falseDesc = new ExprNodeConstantDesc(
+            wrapper.outExpr.getTypeInfo(), Boolean.FALSE);
+        return new NodeInfoWrapper(WalkState.FALSE, null, falseDesc);
+      } else if (wrapper.state == WalkState.FALSE) {
+        ExprNodeConstantDesc trueDesc = new ExprNodeConstantDesc(
+            wrapper.outExpr.getTypeInfo(), Boolean.TRUE);
+        return new NodeInfoWrapper(WalkState.TRUE, null, trueDesc);
+      } else if (wrapper.state == WalkState.DIVIDED) {
+        Boolean[] results = new Boolean[ctx.getPartList().size()];
+        for (int i = 0; i < ctx.getPartList().size(); i++) {
+          results[i] = opNot(wrapper.ResultVector[i]);
+        }
+        return new NodeInfoWrapper(WalkState.DIVIDED, results,
             getOutExpr(fd, nodeOutputs));
       } else {
-        // If any child is unknown, set unknown to true
-        boolean has_part_col = false;
-        for (Object child : nodeOutputs) {
-          NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
-          if (wrapper.state == WalkState.UNKNOWN) {
-            return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
-          } else if (wrapper.state == WalkState.PART_COL) {
-            has_part_col = true;
-          }
-        }
-
-        if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) {
-          //  we need to evaluate result for every pruned partition
-          if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) {
-            // if the return type of the GenericUDF is boolean and all partitions agree on
-            // a result, we update the state of the node to be TRUE of FALSE
-            Boolean[] results = new Boolean[ctx.getPartList().size()];
-            for (int i = 0; i < ctx.getPartList().size(); i++) {
-              results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i),
-                  ctx.getVirtualColumns());
-            }
-            return getResultWrapFromResults(results, fd, nodeOutputs);
-          }
-
-          // the case that return type of the GenericUDF is not boolean, and if not all partition
-          // agree on result, we make the node UNKNOWN. If they all agree, we replace the node
-          // to be a CONSTANT node with value to be the agreed result.
-          Object[] results = new Object[ctx.getPartList().size()];
-          for (int i = 0; i < ctx.getPartList().size(); i++) {
-            results[i] = evalExprWithPart(fd, ctx.getPartList().get(i), ctx.getVirtualColumns());
-          }
-          Object result = ifResultsAgree(results);
-          if (result == null) {
-            // if the result is not boolean and not all partition agree on the
-            // result, we don't remove the condition. Potentially, it can miss
-            // the case like "where ds % 3 == 1 or ds % 3 == 2"
-            // TODO: handle this case by making result vector to handle all
-            // constant values.
-            return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
-          }
-          return new NodeInfoWrapper(WalkState.CONSTANT, null,
-              new ExprNodeConstantDesc(fd.getTypeInfo(), result));
-        }
-
-        // Try to fold, otherwise return the expression itself
-        final ExprNodeGenericFuncDesc desc = getOutExpr(fd, nodeOutputs);
-        final ExprNodeDesc foldedDesc = ConstantPropagateProcFactory.foldExpr(desc);
-        if (foldedDesc != null && foldedDesc instanceof ExprNodeConstantDesc) {
-          ExprNodeConstantDesc constant = (ExprNodeConstantDesc) foldedDesc;
-          if (Boolean.TRUE.equals(constant.getValue())) {
-            return new NodeInfoWrapper(WalkState.TRUE, null, constant);
-          } else if (Boolean.FALSE.equals(constant.getValue())) {
-            return new NodeInfoWrapper(WalkState.FALSE, null, constant);
-          } else {
-            return new NodeInfoWrapper(WalkState.CONSTANT, null, constant);
-          }
-        }
-        return new NodeInfoWrapper(WalkState.CONSTANT, null, desc);
+        return new NodeInfoWrapper(wrapper.state, null,
+            getOutExpr(fd, nodeOutputs));
       }
     }
   };

http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index b2125ca..4353d3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -19,6 +19,8 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -27,6 +29,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.QueryProperties;
 import org.apache.hadoop.hive.ql.QueryState;
@@ -620,4 +623,26 @@ public class ParseContext {
       List<ColumnStatsAutoGatherContext> columnStatsAutoGatherContexts) {
     this.columnStatsAutoGatherContexts = columnStatsAutoGatherContexts;
   }
+
+  public Collection<Operator> getAllOps() {
+    List<Operator> ops = new ArrayList<>();
+    Set<Operator> visited = new HashSet<Operator>();
+    for (Operator<?> op : getTopOps().values()) {
+      getAllOps(ops, visited, op);
+    }
+    return ops;
+  }
+
+  private static void getAllOps(List<Operator> builder, Set<Operator> visited, Operator<?> op) {
+    boolean added = visited.add(op);
+    builder.add(op);
+    if (!added) return;
+    if (op.getNumChild() > 0) {
+      List<Operator<?>> children = op.getChildOperators();
+      for (int i = 0; i < children.size(); i++) {
+        getAllOps(builder, visited, children.get(i));
+      }
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/queries/clientpositive/partition_condition_remover.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/partition_condition_remover.q b/ql/src/test/queries/clientpositive/partition_condition_remover.q
new file mode 100644
index 0000000..39e58b8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/partition_condition_remover.q
@@ -0,0 +1,13 @@
+
+drop table foo;
+
+create table foo (i int) partitioned by (s string);
+
+insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10;
+insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10;
+
+explain select * from foo where s not in ('bar');
+select * from foo where s not in ('bar');
+
+
+drop table foo;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/results/clientpositive/partition_condition_remover.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partition_condition_remover.q.out b/ql/src/test/results/clientpositive/partition_condition_remover.q.out
new file mode 100644
index 0000000..2f8f998
--- /dev/null
+++ b/ql/src/test/results/clientpositive/partition_condition_remover.q.out
@@ -0,0 +1,79 @@
+PREHOOK: query: drop table foo
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table foo
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table foo (i int) partitioned by (s string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo (i int) partitioned by (s string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@foo@s=foo
+POSTHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@foo@s=foo
+POSTHOOK: Lineage: foo PARTITION(s=foo).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+PREHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@foo@s=bar
+POSTHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@foo@s=bar
+POSTHOOK: Lineage: foo PARTITION(s=bar).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+PREHOOK: query: explain select * from foo where s not in ('bar')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from foo where s not in ('bar')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: foo
+          Statistics: Num rows: 10 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: i (type: int), s (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 10 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+            ListSink
+
+PREHOOK: query: select * from foo where s not in ('bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Input: default@foo@s=foo
+#### A masked pattern was here ####
+POSTHOOK: query: select * from foo where s not in ('bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Input: default@foo@s=foo
+#### A masked pattern was here ####
+528534767	foo
+528534767	foo
+528534767	foo
+528534767	foo
+528534767	foo
+528534767	foo
+528534767	foo
+528534767	foo
+528534767	foo
+528534767	foo
+PREHOOK: query: drop table foo
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo
+POSTHOOK: query: drop table foo
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo

http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/results/clientpositive/pcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out
index 8b99401..22e7fd2 100644
--- a/ql/src/test/results/clientpositive/pcs.q.out
+++ b/ql/src/test/results/clientpositive/pcs.q.out
@@ -1047,7 +1047,7 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean)
+              predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string)
@@ -1072,7 +1072,7 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean)
+              predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string)