Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 666C5200B9B for ; Thu, 1 Sep 2016 00:43:22 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 64B3D160AB4; Wed, 31 Aug 2016 22:43:22 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E2956160AB5 for ; Thu, 1 Sep 2016 00:43:20 +0200 (CEST) Received: (qmail 93251 invoked by uid 500); 31 Aug 2016 22:43:20 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 93189 invoked by uid 99); 31 Aug 2016 22:43:19 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 31 Aug 2016 22:43:19 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D4338E053F; Wed, 31 Aug 2016 22:43:19 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Date: Wed, 31 Aug 2016 22:43:20 -0000 Message-Id: <808cb97f967c4e50a20ae7a68c279548@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [2/2] hive git commit: HIVE-14652 : incorrect results for not in on partition columns (Sergey Shelukhin, reviewed by Jesus Camacho Rodriguez) archived-at: Wed, 31 Aug 2016 22:43:22 -0000 HIVE-14652 : incorrect results for not in on partition columns (Sergey Shelukhin, reviewed by Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9b39504 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9b39504 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9b39504 Branch: refs/heads/branch-2.1 Commit: b9b395046f11f427fad0a6f5f7b50bb6db6e354e Parents: 8a25c54 Author: Sergey Shelukhin Authored: Wed Aug 31 15:36:47 2016 -0700 Committer: Sergey Shelukhin Committed: Wed Aug 31 15:42:50 2016 -0700 ---------------------------------------------------------------------- .../ql/optimizer/pcr/PcrExprProcFactory.java | 423 +++++++++---------- .../hadoop/hive/ql/parse/ParseContext.java | 25 ++ .../partition_condition_remover.q | 13 + .../partition_condition_remover.q.out | 79 ++++ ql/src/test/results/clientpositive/pcs.q.out | 4 +- 5 files changed, 330 insertions(+), 214 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index f9388e2..461dbe5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.optimizer.pcr; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; + import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; @@ -185,7 +187,7 @@ public final class PcrExprProcFactory { } public enum WalkState { - PART_COL, TRUE, FALSE, CONSTANT, UNKNOWN, DIVIDED + PART_COL, TRUE, FALSE, CONSTANT, UNKNOWN, DIVIDED, PART_COL_STRUCT } public static class NodeInfoWrapper { @@ -253,242 +255,239 @@ public final class PcrExprProcFactory { Object... nodeOutputs) throws SemanticException { PcrExprProcCtx ctx = (PcrExprProcCtx) procCtx; ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd; + if (LOG.isDebugEnabled()) { + String err = "Processing " + fd.getExprString() + " " + + fd.getGenericUDF().getUdfName() + " outputs "; + for (Object child : nodeOutputs) { + NodeInfoWrapper wrapper = (NodeInfoWrapper) child; + err += "{" + wrapper.state + ", " + wrapper.outExpr + "}, "; + } + LOG.debug(err); + } if (FunctionRegistry.isOpNot(fd)) { - assert (nodeOutputs.length == 1); - NodeInfoWrapper wrapper = (NodeInfoWrapper) nodeOutputs[0]; - if (wrapper.state == WalkState.TRUE) { - ExprNodeConstantDesc falseDesc = new ExprNodeConstantDesc( - wrapper.outExpr.getTypeInfo(), Boolean.FALSE); - return new NodeInfoWrapper(WalkState.FALSE, null, falseDesc); - } else if (wrapper.state == WalkState.FALSE) { - ExprNodeConstantDesc trueDesc = new ExprNodeConstantDesc( - wrapper.outExpr.getTypeInfo(), Boolean.TRUE); - return new NodeInfoWrapper(WalkState.TRUE, null, trueDesc); - } else if (wrapper.state == WalkState.DIVIDED) { + return handleUdfNot(ctx, fd, nodeOutputs); + } else if (FunctionRegistry.isOpAnd(fd)) { + return handleUdfAnd(ctx, fd, nodeOutputs); + } else if (FunctionRegistry.isOpOr(fd)) { + return handleUdfOr(ctx, fd, nodeOutputs); + } else if (FunctionRegistry.isIn(fd)) { + List children = fd.getChildren(); + // We should not remove the dynamic partition pruner generated synthetic predicates. + for (int i = 1; i < children.size(); i++) { + if (children.get(i) instanceof ExprNodeDynamicListDesc) { + return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)); + } + } + // Otherwise, handle like a normal generic UDF. + return handleDeterministicUdf(ctx, fd, nodeOutputs); + } else if (fd.getGenericUDF() instanceof GenericUDFStruct) { + // Handle structs composed of partition columns, + for (Object child : nodeOutputs) { + NodeInfoWrapper wrapper = (NodeInfoWrapper) child; + if (wrapper.state != WalkState.PART_COL) { + return handleDeterministicUdf(ctx, fd, nodeOutputs); // Giving up. + } + } + return new NodeInfoWrapper(WalkState.PART_COL_STRUCT, null, getOutExpr(fd, nodeOutputs)); + } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { + // If it's a non-deterministic UDF, set unknown to true + return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)); + } else { + return handleDeterministicUdf(ctx, fd, nodeOutputs); + } + } + + private Object handleDeterministicUdf(PcrExprProcCtx ctx, + ExprNodeGenericFuncDesc fd, Object... nodeOutputs) + throws SemanticException { + Boolean has_part_col = checkForPartColsAndUnknown(fd, nodeOutputs); + if (has_part_col == null) { + return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)); + } + + if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) { + // we need to evaluate result for every pruned partition + if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) { + // if the return type of the GenericUDF is boolean and all partitions agree on + // a result, we update the state of the node to be TRUE of FALSE Boolean[] results = new Boolean[ctx.getPartList().size()]; for (int i = 0; i < ctx.getPartList().size(); i++) { - results[i] = opNot(wrapper.ResultVector[i]); + results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i), + ctx.getVirtualColumns()); } - return new NodeInfoWrapper(WalkState.DIVIDED, results, - getOutExpr(fd, nodeOutputs)); + return getResultWrapFromResults(results, fd, nodeOutputs); + } + + // the case that return type of the GenericUDF is not boolean, and if not all partition + // agree on result, we make the node UNKNOWN. If they all agree, we replace the node + // to be a CONSTANT node with value to be the agreed result. + Object[] results = new Object[ctx.getPartList().size()]; + for (int i = 0; i < ctx.getPartList().size(); i++) { + results[i] = evalExprWithPart(fd, ctx.getPartList().get(i), ctx.getVirtualColumns()); + } + Object result = ifResultsAgree(results); + if (result == null) { + // if the result is not boolean and not all partition agree on the + // result, we don't remove the condition. Potentially, it can miss + // the case like "where ds % 3 == 1 or ds % 3 == 2" + // TODO: handle this case by making result vector to handle all + // constant values. + return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)); + } + return new NodeInfoWrapper(WalkState.CONSTANT, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), result)); + } + + // Try to fold, otherwise return the expression itself + final ExprNodeGenericFuncDesc desc = getOutExpr(fd, nodeOutputs); + final ExprNodeDesc foldedDesc = ConstantPropagateProcFactory.foldExpr(desc); + if (foldedDesc != null && foldedDesc instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc constant = (ExprNodeConstantDesc) foldedDesc; + if (Boolean.TRUE.equals(constant.getValue())) { + return new NodeInfoWrapper(WalkState.TRUE, null, constant); + } else if (Boolean.FALSE.equals(constant.getValue())) { + return new NodeInfoWrapper(WalkState.FALSE, null, constant); } else { - return new NodeInfoWrapper(wrapper.state, null, - getOutExpr(fd, nodeOutputs)); + return new NodeInfoWrapper(WalkState.CONSTANT, null, constant); } - } else if (FunctionRegistry.isOpAnd(fd)) { - boolean anyUnknown = false; // Whether any of the node outputs is unknown - boolean allDivided = true; // Whether all of the node outputs are divided - List newNodeOutputsList = - new ArrayList(nodeOutputs.length); - for (int i = 0; i < nodeOutputs.length; i++) { - NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i]; - if (c.state == WalkState.FALSE) { - return c; - } - if (c.state == WalkState.UNKNOWN) { - anyUnknown = true; - } - if (c.state != WalkState.DIVIDED) { - allDivided = false; - } - if (c.state != WalkState.TRUE) { - newNodeOutputsList.add(c); - } + } + return new NodeInfoWrapper(WalkState.CONSTANT, null, desc); + } + + private Boolean checkForPartColsAndUnknown(ExprNodeGenericFuncDesc fd, + Object... nodeOutputs) { + boolean has_part_col = false; + for (Object child : nodeOutputs) { + NodeInfoWrapper wrapper = (NodeInfoWrapper) child; + if (wrapper.state == WalkState.UNKNOWN) { + return null; + } else if (wrapper.state == WalkState.PART_COL + || wrapper.state == WalkState.PART_COL_STRUCT) { + has_part_col = true; } - // If all of them were true, return true - if (newNodeOutputsList.size() == 0) { - return new NodeInfoWrapper(WalkState.TRUE, null, - new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)); + } + return has_part_col; + } + + private Object handleUdfOr(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd, + Object... nodeOutputs) { + boolean anyUnknown = false; // Whether any of the node outputs is unknown + boolean allDivided = true; // Whether all of the node outputs are divided + List newNodeOutputsList = + new ArrayList(nodeOutputs.length); + for (int i = 0; i< nodeOutputs.length; i++) { + NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i]; + if (c.state == WalkState.TRUE) { + return c; } - // If we are left with a single child, return the child - if (newNodeOutputsList.size() == 1) { - return newNodeOutputsList.get(0); + if (c.state == WalkState.UNKNOWN) { + anyUnknown = true; } - Object[] newNodeOutputs = newNodeOutputsList.toArray(); - if (anyUnknown) { - return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); + if (c.state != WalkState.DIVIDED) { + allDivided = false; } - if (allDivided) { - Boolean[] results = new Boolean[ctx.getPartList().size()]; - for (int i = 0; i < ctx.getPartList().size(); i++) { - Boolean[] andArray = new Boolean[newNodeOutputs.length]; - for (int j = 0; j < newNodeOutputs.length; j++) { - andArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i]; - } - results[i] = opAnd(andArray); - } - return getResultWrapFromResults(results, fd, newNodeOutputs); + if (c.state != WalkState.FALSE) { + newNodeOutputsList.add(c); } + } + // If all of them were false, return false + if (newNodeOutputsList.size() == 0) { + return new NodeInfoWrapper(WalkState.FALSE, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.FALSE)); + } + // If we are left with a single child, return the child + if (newNodeOutputsList.size() == 1) { + return newNodeOutputsList.get(0); + } + Object[] newNodeOutputs = newNodeOutputsList.toArray(); + if (anyUnknown) { return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); - } else if (FunctionRegistry.isOpOr(fd)) { - boolean anyUnknown = false; // Whether any of the node outputs is unknown - boolean allDivided = true; // Whether all of the node outputs are divided - List newNodeOutputsList = - new ArrayList(nodeOutputs.length); - for (int i = 0; i< nodeOutputs.length; i++) { - NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i]; - if (c.state == WalkState.TRUE) { - return c; - } - if (c.state == WalkState.UNKNOWN) { - anyUnknown = true; - } - if (c.state != WalkState.DIVIDED) { - allDivided = false; - } - if (c.state != WalkState.FALSE) { - newNodeOutputsList.add(c); + } + if (allDivided) { + Boolean[] results = new Boolean[ctx.getPartList().size()]; + for (int i = 0; i < ctx.getPartList().size(); i++) { + Boolean[] orArray = new Boolean[newNodeOutputs.length]; + for (int j = 0; j < newNodeOutputs.length; j++) { + orArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i]; } + results[i] = opOr(orArray); } - // If all of them were false, return false - if (newNodeOutputsList.size() == 0) { - return new NodeInfoWrapper(WalkState.FALSE, null, - new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.FALSE)); + return getResultWrapFromResults(results, fd, newNodeOutputs); + } + return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); + } + + private Object handleUdfAnd(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd, + Object... nodeOutputs) { + boolean anyUnknown = false; // Whether any of the node outputs is unknown + boolean allDivided = true; // Whether all of the node outputs are divided + List newNodeOutputsList = + new ArrayList(nodeOutputs.length); + for (int i = 0; i < nodeOutputs.length; i++) { + NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i]; + if (c.state == WalkState.FALSE) { + return c; } - // If we are left with a single child, return the child - if (newNodeOutputsList.size() == 1) { - return newNodeOutputsList.get(0); + if (c.state == WalkState.UNKNOWN) { + anyUnknown = true; } - Object[] newNodeOutputs = newNodeOutputsList.toArray(); - if (anyUnknown) { - return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); + if (c.state != WalkState.DIVIDED) { + allDivided = false; } - if (allDivided) { - Boolean[] results = new Boolean[ctx.getPartList().size()]; - for (int i = 0; i < ctx.getPartList().size(); i++) { - Boolean[] orArray = new Boolean[newNodeOutputs.length]; - for (int j = 0; j < newNodeOutputs.length; j++) { - orArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i]; - } - results[i] = opOr(orArray); - } - return getResultWrapFromResults(results, fd, newNodeOutputs); + if (c.state != WalkState.TRUE) { + newNodeOutputsList.add(c); } + } + // If all of them were true, return true + if (newNodeOutputsList.size() == 0) { + return new NodeInfoWrapper(WalkState.TRUE, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)); + } + // If we are left with a single child, return the child + if (newNodeOutputsList.size() == 1) { + return newNodeOutputsList.get(0); + } + Object[] newNodeOutputs = newNodeOutputsList.toArray(); + if (anyUnknown) { return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); - } else if (FunctionRegistry.isIn(fd)) { - List children = fd.getChildren(); - boolean removePredElem = false; - ExprNodeDesc lhs = children.get(0); - - if (lhs instanceof ExprNodeColumnDesc) { - // It is an IN clause on a column - if (((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) { - // It is a partition column, we can proceed - removePredElem = true; - } - if (removePredElem) { - // We should not remove the dynamic partition pruner generated synthetic predicates. - for (int i = 1; i < children.size(); i++) { - if (children.get(i) instanceof ExprNodeDynamicListDesc) { - removePredElem = false; - break; - } - } - } - } else if (lhs instanceof ExprNodeGenericFuncDesc) { - // It is an IN clause on a struct - // Make sure that the generic udf is deterministic - if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) - .getGenericUDF())) { - boolean hasOnlyPartCols = true; - boolean hasDynamicListDesc = false; - - for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { - // Check if the current field expression contains only - // partition column or a virtual column or constants. - // If yes, this filter predicate is a candidate for this optimization. - if (!(ed instanceof ExprNodeColumnDesc && - ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { - hasOnlyPartCols = false; - break; - } - } - - // If we have non-partition columns, we cannot remove the predicate. - if (hasOnlyPartCols) { - // We should not remove the dynamic partition pruner generated synthetic predicates. - for (int i = 1; i < children.size(); i++) { - if (children.get(i) instanceof ExprNodeDynamicListDesc) { - hasDynamicListDesc = true; - break; - } - } - } - - removePredElem = hasOnlyPartCols && !hasDynamicListDesc; + } + if (allDivided) { + Boolean[] results = new Boolean[ctx.getPartList().size()]; + for (int i = 0; i < ctx.getPartList().size(); i++) { + Boolean[] andArray = new Boolean[newNodeOutputs.length]; + for (int j = 0; j < newNodeOutputs.length; j++) { + andArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i]; } + results[i] = opAnd(andArray); } + return getResultWrapFromResults(results, fd, newNodeOutputs); + } + return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); + } - // If removePredElem is set to true, return true as this is a potential candidate - // for partition condition remover. Else, set the WalkState for this node to unknown. - return removePredElem ? - new NodeInfoWrapper(WalkState.TRUE, null, - new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : - new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; - } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { - // If it's a non-deterministic UDF, set unknown to true - return new NodeInfoWrapper(WalkState.UNKNOWN, null, + private Object handleUdfNot(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd, + Object... nodeOutputs) { + assert (nodeOutputs.length == 1); + NodeInfoWrapper wrapper = (NodeInfoWrapper) nodeOutputs[0]; + if (wrapper.state == WalkState.TRUE) { + ExprNodeConstantDesc falseDesc = new ExprNodeConstantDesc( + wrapper.outExpr.getTypeInfo(), Boolean.FALSE); + return new NodeInfoWrapper(WalkState.FALSE, null, falseDesc); + } else if (wrapper.state == WalkState.FALSE) { + ExprNodeConstantDesc trueDesc = new ExprNodeConstantDesc( + wrapper.outExpr.getTypeInfo(), Boolean.TRUE); + return new NodeInfoWrapper(WalkState.TRUE, null, trueDesc); + } else if (wrapper.state == WalkState.DIVIDED) { + Boolean[] results = new Boolean[ctx.getPartList().size()]; + for (int i = 0; i < ctx.getPartList().size(); i++) { + results[i] = opNot(wrapper.ResultVector[i]); + } + return new NodeInfoWrapper(WalkState.DIVIDED, results, getOutExpr(fd, nodeOutputs)); } else { - // If any child is unknown, set unknown to true - boolean has_part_col = false; - for (Object child : nodeOutputs) { - NodeInfoWrapper wrapper = (NodeInfoWrapper) child; - if (wrapper.state == WalkState.UNKNOWN) { - return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)); - } else if (wrapper.state == WalkState.PART_COL) { - has_part_col = true; - } - } - - if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) { - // we need to evaluate result for every pruned partition - if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) { - // if the return type of the GenericUDF is boolean and all partitions agree on - // a result, we update the state of the node to be TRUE of FALSE - Boolean[] results = new Boolean[ctx.getPartList().size()]; - for (int i = 0; i < ctx.getPartList().size(); i++) { - results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i), - ctx.getVirtualColumns()); - } - return getResultWrapFromResults(results, fd, nodeOutputs); - } - - // the case that return type of the GenericUDF is not boolean, and if not all partition - // agree on result, we make the node UNKNOWN. If they all agree, we replace the node - // to be a CONSTANT node with value to be the agreed result. - Object[] results = new Object[ctx.getPartList().size()]; - for (int i = 0; i < ctx.getPartList().size(); i++) { - results[i] = evalExprWithPart(fd, ctx.getPartList().get(i), ctx.getVirtualColumns()); - } - Object result = ifResultsAgree(results); - if (result == null) { - // if the result is not boolean and not all partition agree on the - // result, we don't remove the condition. Potentially, it can miss - // the case like "where ds % 3 == 1 or ds % 3 == 2" - // TODO: handle this case by making result vector to handle all - // constant values. - return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)); - } - return new NodeInfoWrapper(WalkState.CONSTANT, null, - new ExprNodeConstantDesc(fd.getTypeInfo(), result)); - } - - // Try to fold, otherwise return the expression itself - final ExprNodeGenericFuncDesc desc = getOutExpr(fd, nodeOutputs); - final ExprNodeDesc foldedDesc = ConstantPropagateProcFactory.foldExpr(desc); - if (foldedDesc != null && foldedDesc instanceof ExprNodeConstantDesc) { - ExprNodeConstantDesc constant = (ExprNodeConstantDesc) foldedDesc; - if (Boolean.TRUE.equals(constant.getValue())) { - return new NodeInfoWrapper(WalkState.TRUE, null, constant); - } else if (Boolean.FALSE.equals(constant.getValue())) { - return new NodeInfoWrapper(WalkState.FALSE, null, constant); - } else { - return new NodeInfoWrapper(WalkState.CONSTANT, null, constant); - } - } - return new NodeInfoWrapper(WalkState.CONSTANT, null, desc); + return new NodeInfoWrapper(wrapper.state, null, + getOutExpr(fd, nodeOutputs)); } } }; http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index b2125ca..4353d3a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.parse; import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -27,6 +29,7 @@ import java.util.Map; import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryState; @@ -620,4 +623,26 @@ public class ParseContext { List columnStatsAutoGatherContexts) { this.columnStatsAutoGatherContexts = columnStatsAutoGatherContexts; } + + public Collection getAllOps() { + List ops = new ArrayList<>(); + Set visited = new HashSet(); + for (Operator op : getTopOps().values()) { + getAllOps(ops, visited, op); + } + return ops; + } + + private static void getAllOps(List builder, Set visited, Operator op) { + boolean added = visited.add(op); + builder.add(op); + if (!added) return; + if (op.getNumChild() > 0) { + List> children = op.getChildOperators(); + for (int i = 0; i < children.size(); i++) { + getAllOps(builder, visited, children.get(i)); + } + } + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/queries/clientpositive/partition_condition_remover.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/partition_condition_remover.q b/ql/src/test/queries/clientpositive/partition_condition_remover.q new file mode 100644 index 0000000..39e58b8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/partition_condition_remover.q @@ -0,0 +1,13 @@ + +drop table foo; + +create table foo (i int) partitioned by (s string); + +insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10; +insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10; + +explain select * from foo where s not in ('bar'); +select * from foo where s not in ('bar'); + + +drop table foo; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/results/clientpositive/partition_condition_remover.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/partition_condition_remover.q.out b/ql/src/test/results/clientpositive/partition_condition_remover.q.out new file mode 100644 index 0000000..2f8f998 --- /dev/null +++ b/ql/src/test/results/clientpositive/partition_condition_remover.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table foo (i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@foo@s=foo +POSTHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@foo@s=foo +POSTHOOK: Lineage: foo PARTITION(s=foo).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@foo@s=bar +POSTHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@foo@s=bar +POSTHOOK: Lineage: foo PARTITION(s=bar).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain select * from foo where s not in ('bar') +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from foo where s not in ('bar') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: foo + Statistics: Num rows: 10 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), s (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 90 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from foo where s not in ('bar') +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Input: default@foo@s=foo +#### A masked pattern was here #### +POSTHOOK: query: select * from foo where s not in ('bar') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Input: default@foo@s=foo +#### A masked pattern was here #### +528534767 foo +528534767 foo +528534767 foo +528534767 foo +528534767 foo +528534767 foo +528534767 foo +528534767 foo +528534767 foo +528534767 foo +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/results/clientpositive/pcs.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index 8b99401..22e7fd2 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -1047,7 +1047,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean) + predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -1072,7 +1072,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean) + predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: string)