Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B380B1880C for ; Thu, 6 Aug 2015 00:50:08 +0000 (UTC) Received: (qmail 47203 invoked by uid 500); 6 Aug 2015 00:50:07 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 47001 invoked by uid 500); 6 Aug 2015 00:50:07 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 46579 invoked by uid 99); 6 Aug 2015 00:50:07 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 06 Aug 2015 00:50:07 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 65E90E050A; Thu, 6 Aug 2015 00:50:07 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Date: Thu, 06 Aug 2015 00:50:11 -0000 Message-Id: In-Reply-To: <8f9d1818a5f44addab68cb4879fb5a27@git.apache.org> References: <8f9d1818a5f44addab68cb4879fb5a27@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [05/53] [abbrv] hive git commit: HIVE-10799. Refactor the SearchArgumentFactory to remove the AST-specific factory. (omalley reviewed by prasanth_j) HIVE-10799. Refactor the SearchArgumentFactory to remove the AST-specific factory. (omalley reviewed by prasanth_j) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c178a6e9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c178a6e9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c178a6e9 Branch: refs/heads/llap Commit: c178a6e9d12055e5bde634123ca58f243ae39477 Parents: d2ee354 Author: Owen O'Malley Authored: Tue Jul 28 12:47:59 2015 -0700 Committer: Owen O'Malley Committed: Tue Jul 28 12:47:59 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/common/type/HiveDecimal.java | 2 +- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 +- .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 30 +- .../read/ParquetRecordReaderWrapper.java | 7 +- .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 439 +++ .../hive/ql/io/sarg/SearchArgumentFactory.java | 25 +- .../hive/ql/io/sarg/SearchArgumentImpl.java | 1000 ++---- .../hive/ql/io/orc/TestInputOutputFormat.java | 9 +- .../hadoop/hive/ql/io/orc/TestOrcFile.java | 11 +- .../hive/ql/io/orc/TestRecordReaderImpl.java | 63 +- .../parquet/TestParquetRecordReaderWrapper.java | 155 + .../ql/io/sarg/TestConvertAstToSearchArg.java | 2863 +++++++++++++++++ .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 2888 +----------------- .../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 33 +- .../hadoop/hive/ql/io/sarg/SearchArgument.java | 34 +- .../hive/serde2/io/HiveDecimalWritable.java | 8 + 16 files changed, 4080 insertions(+), 3491 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java b/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java index a8215f2..f14fc2d 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java @@ -75,7 +75,7 @@ public class HiveDecimal implements Comparable { public static HiveDecimal create(String dec) { BigDecimal bd; try { - bd = new BigDecimal(dec); + bd = new BigDecimal(dec.trim()); } catch (NumberFormatException ex) { return null; } http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 3a9e64e..4e6dd7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -54,10 +54,10 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -305,7 +305,7 @@ public class OrcInputFormat implements InputFormat, options.searchArgument(null, null); return; } - SearchArgument sarg = SearchArgumentFactory.createFromConf(conf); + SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf); if (sarg == null) { LOG.debug("No ORC pushdown predicate"); options.searchArgument(null, null); http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 4f79e37..f85420d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -47,8 +47,8 @@ import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReader; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim; import org.apache.hadoop.io.Text; @@ -523,7 +523,8 @@ class RecordReaderImpl implements RecordReader { result = TruthValue.YES_NO_NULL; } } else if (predObj instanceof String || predObj instanceof Text || - predObj instanceof HiveDecimal || predObj instanceof BigDecimal) { + predObj instanceof HiveDecimalWritable || + predObj instanceof BigDecimal) { if (bf.testString(predObj.toString())) { result = TruthValue.YES_NO_NULL; } @@ -560,11 +561,7 @@ class RecordReaderImpl implements RecordReader { } private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) { - if (obj != null) { - if (obj instanceof ExprNodeConstantDesc) { - obj = ((ExprNodeConstantDesc) obj).getValue(); - } - } else { + if (obj == null) { return null; } switch (type) { @@ -588,20 +585,23 @@ class RecordReaderImpl implements RecordReader { break; case DECIMAL: if (obj instanceof Boolean) { - return ((Boolean) obj).booleanValue() ? HiveDecimal.ONE : HiveDecimal.ZERO; + return new HiveDecimalWritable(((Boolean) obj).booleanValue() ? + HiveDecimal.ONE : HiveDecimal.ZERO); } else if (obj instanceof Integer) { - return HiveDecimal.create(((Integer) obj).intValue()); + return new HiveDecimalWritable(((Integer) obj).intValue()); } else if (obj instanceof Long) { - return HiveDecimal.create(((Long) obj)); + return new HiveDecimalWritable(((Long) obj)); } else if (obj instanceof Float || obj instanceof Double || obj instanceof String) { - return HiveDecimal.create(obj.toString()); + return new HiveDecimalWritable(obj.toString()); } else if (obj instanceof BigDecimal) { - return HiveDecimal.create((BigDecimal) obj); + return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) obj)); } else if (obj instanceof HiveDecimal) { + return new HiveDecimalWritable((HiveDecimal) obj); + } else if (obj instanceof HiveDecimalWritable) { return obj; } else if (obj instanceof Timestamp) { - return HiveDecimal.create( + return new HiveDecimalWritable( new Double(new TimestampWritable((Timestamp) obj).getDouble()).toString()); } break; @@ -641,12 +641,16 @@ class RecordReaderImpl implements RecordReader { case TIMESTAMP: if (obj instanceof Timestamp) { return obj; + } else if (obj instanceof Integer) { + return TimestampWritable.longToTimestamp(((Number) obj).longValue(), false); } else if (obj instanceof Float) { return TimestampWritable.doubleToTimestamp(((Float) obj).doubleValue()); } else if (obj instanceof Double) { return TimestampWritable.doubleToTimestamp(((Double) obj).doubleValue()); } else if (obj instanceof HiveDecimal) { return TimestampWritable.decimalToTimestamp((HiveDecimal) obj); + } else if (obj instanceof HiveDecimalWritable) { + return TimestampWritable.decimalToTimestamp(((HiveDecimalWritable) obj).getHiveDecimal()); } else if (obj instanceof Date) { return new Timestamp(((Date) obj).getTime()); } http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index a64ec06..49e52da 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -27,10 +27,10 @@ import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.ArrayWritable; @@ -149,7 +149,7 @@ public class ParquetRecordReaderWrapper implements RecordReader children = expr.getChildren(); + if (variable < 0 || variable >= children.size()) { + return null; + } + ExprNodeDesc child = children.get(variable); + if (child instanceof ExprNodeColumnDesc) { + return ((ExprNodeColumnDesc) child).getColumn(); + } + return null; + } + + private static Object boxLiteral(ExprNodeConstantDesc constantDesc, + PredicateLeaf.Type type) { + Object lit = constantDesc.getValue(); + if (lit == null) { + return null; + } + switch (type) { + case INTEGER: + return ((Number) lit).intValue(); + case LONG: + return ((Number) lit).longValue(); + case STRING: + if (lit instanceof HiveChar) { + lit = ((HiveChar) lit).getPaddedValue(); + } else if (lit instanceof String) { + return lit; + } else { + return lit.toString(); + } + case FLOAT: + if (lit instanceof Float) { + // converting a float directly to a double causes annoying conversion + // problems + return Double.parseDouble(lit.toString()); + } else { + return ((Number) lit).doubleValue(); + } + case TIMESTAMP: + return Timestamp.valueOf(lit.toString()); + case DATE: + return Date.valueOf(lit.toString()); + case DECIMAL: + LOG.warn("boxing " + lit); + return new HiveDecimalWritable(lit.toString()); + case BOOLEAN: + return lit; + default: + throw new IllegalArgumentException("Unknown literal " + type); + } + } + + /** + * Find the child that is the literal. + * @param expr the parent node to check + * @param type the type of the expression + * @return the literal boxed if found or null + */ + private static Object findLiteral(ExprNodeGenericFuncDesc expr, + PredicateLeaf.Type type) { + List children = expr.getChildren(); + if (children.size() != 2) { + return null; + } + Object result = null; + for(ExprNodeDesc child: children) { + if (child instanceof ExprNodeConstantDesc) { + if (result != null) { + return null; + } + result = boxLiteral((ExprNodeConstantDesc) child, type); + } + } + return result; + } + + /** + * Return the boxed literal at the given position + * @param expr the parent node + * @param type the type of the expression + * @param position the child position to check + * @return the boxed literal if found otherwise null + */ + private static Object getLiteral(ExprNodeGenericFuncDesc expr, + PredicateLeaf.Type type, + int position) { + List children = expr.getChildren(); + Object child = children.get(position); + if (child instanceof ExprNodeConstantDesc) { + return boxLiteral((ExprNodeConstantDesc) child, type); + } + return null; + } + + private static Object[] getLiteralList(ExprNodeGenericFuncDesc expr, + PredicateLeaf.Type type, + int start) { + List children = expr.getChildren(); + Object[] result = new Object[children.size() - start]; + + // ignore the first child, since it is the variable + int posn = 0; + for(ExprNodeDesc child: children.subList(start, children.size())) { + if (child instanceof ExprNodeConstantDesc) { + result[posn++] = boxLiteral((ExprNodeConstantDesc) child, type); + } else { + // if we get some non-literals, we need to punt + return null; + } + } + return result; + } + + private void createLeaf(PredicateLeaf.Operator operator, + ExprNodeGenericFuncDesc expression, + int variable) { + String columnName = getColumnName(expression, variable); + if (columnName == null) { + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + return; + } + PredicateLeaf.Type type = getType(expression.getChildren().get(variable)); + if (type == null) { + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + return; + } + + // if the variable was on the right, we need to swap things around + boolean needSwap = false; + if (variable != 0) { + if (operator == PredicateLeaf.Operator.LESS_THAN) { + needSwap = true; + operator = PredicateLeaf.Operator.LESS_THAN_EQUALS; + } else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) { + needSwap = true; + operator = PredicateLeaf.Operator.LESS_THAN; + } + } + if (needSwap) { + builder.startNot(); + } + + switch (operator) { + case IS_NULL: + builder.isNull(columnName, type); + break; + case EQUALS: + builder.equals(columnName, type, findLiteral(expression, type)); + break; + case NULL_SAFE_EQUALS: + builder.nullSafeEquals(columnName, type, findLiteral(expression, type)); + break; + case LESS_THAN: + builder.lessThan(columnName, type, findLiteral(expression, type)); + break; + case LESS_THAN_EQUALS: + builder.lessThanEquals(columnName, type, findLiteral(expression, type)); + break; + case IN: + builder.in(columnName, type, + getLiteralList(expression, type, variable + 1)); + break; + case BETWEEN: + builder.between(columnName, type, + getLiteral(expression, type, variable + 1), + getLiteral(expression, type, variable + 2)); + break; + } + + if (needSwap) { + builder.end(); + } + } + + /** + * Find the variable in the expression. + * @param expr the expression to look in + * @return the index of the variable or -1 if there is not exactly one + * variable. + */ + private int findVariable(ExprNodeDesc expr) { + int result = -1; + List children = expr.getChildren(); + for(int i = 0; i < children.size(); ++i) { + ExprNodeDesc child = children.get(i); + if (child instanceof ExprNodeColumnDesc) { + // if we already found a variable, this isn't a sarg + if (result != -1) { + return -1; + } else { + result = i; + } + } + } + return result; + } + + /** + * Create a leaf expression when we aren't sure where the variable is + * located. + * @param operator the operator type that was found + * @param expression the expression to check + */ + private void createLeaf(PredicateLeaf.Operator operator, + ExprNodeGenericFuncDesc expression) { + createLeaf(operator, expression, findVariable(expression)); + } + + private void addChildren(ExprNodeGenericFuncDesc node) { + for(ExprNodeDesc child: node.getChildren()) { + parse(child); + } + } + + /** + * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree. + * @param expression the Hive ExprNodeDesc + */ + private void parse(ExprNodeDesc expression) { + // Most of the stuff we can handle are generic function descriptions, so + // handle the special cases. + if (expression.getClass() != ExprNodeGenericFuncDesc.class) { + + // if it is a reference to a boolean column, covert it to a truth test. + if (expression instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) expression; + if (columnDesc.getTypeString().equals("boolean")) { + builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN, + true); + return; + } + } + + // otherwise, we don't know what to do so make it a maybe + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + return; + } + + // get the kind of expression + ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression; + Class op = expr.getGenericUDF().getClass(); + + // handle the logical operators + if (op == GenericUDFOPOr.class) { + builder.startOr(); + addChildren(expr); + builder.end(); + } else if (op == GenericUDFOPAnd.class) { + builder.startAnd(); + addChildren(expr); + builder.end(); + } else if (op == GenericUDFOPNot.class) { + builder.startNot(); + addChildren(expr); + builder.end(); + } else if (op == GenericUDFOPEqual.class) { + createLeaf(PredicateLeaf.Operator.EQUALS, expr); + } else if (op == GenericUDFOPNotEqual.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.EQUALS, expr); + builder.end(); + } else if (op == GenericUDFOPEqualNS.class) { + createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr); + } else if (op == GenericUDFOPGreaterThan.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr); + builder.end(); + } else if (op == GenericUDFOPEqualOrGreaterThan.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.LESS_THAN, expr); + builder.end(); + } else if (op == GenericUDFOPLessThan.class) { + createLeaf(PredicateLeaf.Operator.LESS_THAN, expr); + } else if (op == GenericUDFOPEqualOrLessThan.class) { + createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr); + } else if (op == GenericUDFIn.class) { + createLeaf(PredicateLeaf.Operator.IN, expr, 0); + } else if (op == GenericUDFBetween.class) { + createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1); + } else if (op == GenericUDFOPNull.class) { + createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0); + } else if (op == GenericUDFOPNotNull.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0); + builder.end(); + + // otherwise, we didn't understand it, so mark it maybe + } else { + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + } + } + + + public static final String SARG_PUSHDOWN = "sarg.pushdown"; + + public static SearchArgument create(ExprNodeGenericFuncDesc expression) { + return new ConvertAstToSearchArg(expression).buildSearchArgument(); + } + + + public static SearchArgument create(String kryo) { + Input input = new Input(Base64.decodeBase64(kryo)); + return new Kryo().readObject(input, SearchArgumentImpl.class); + } + + public static SearchArgument createFromConf(Configuration conf) { + String sargString; + if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) { + return create(Utilities.deserializeExpression(sargString)); + } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) { + return create(sargString); + } + return null; + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java index c75e820..6ad927d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.io.sarg; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; @@ -30,27 +33,7 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; * A factory for creating SearchArguments. */ public class SearchArgumentFactory { - public static final String SARG_PUSHDOWN = "sarg.pushdown"; - - public static SearchArgument create(ExprNodeGenericFuncDesc expression) { - return new SearchArgumentImpl(expression); - } - public static Builder newBuilder() { - return SearchArgumentImpl.newBuilder(); - } - - public static SearchArgument create(String kryo) { - return SearchArgumentImpl.fromKryo(kryo); - } - - public static SearchArgument createFromConf(Configuration conf) { - String sargString = null; - if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) { - return create(Utilities.deserializeExpression(sargString)); - } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) { - return create(sargString); - } - return null; + return new SearchArgumentImpl.BuilderImpl(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index 46f1e4e..1582a75 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -18,47 +18,20 @@ package org.apache.hadoop.hive.ql.io.sarg; -import java.math.BigDecimal; import java.sql.Timestamp; import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Arrays; import java.util.Deque; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; /** @@ -74,6 +47,8 @@ final class SearchArgumentImpl implements SearchArgument { private final Object literal; private final List literalList; + // Used by kryo + @SuppressWarnings("unused") PredicateLeafImpl() { operator = null; type = null; @@ -91,7 +66,24 @@ final class SearchArgumentImpl implements SearchArgument { this.type = type; this.columnName = columnName; this.literal = literal; + if (literal != null) { + if (literal.getClass() != type.getValueClass()) { + throw new IllegalArgumentException("Wrong value class " + + literal.getClass().getName() + " for " + type + "." + operator + + " leaf"); + } + } this.literalList = literalList; + if (literalList != null) { + Class valueCls = type.getValueClass(); + for(Object lit: literalList) { + if (lit != null && lit.getClass() != valueCls) { + throw new IllegalArgumentException("Wrong value class item " + + lit.getClass().getName() + " for " + type + "." + operator + + " leaf"); + } + } + } } @Override @@ -138,7 +130,7 @@ final class SearchArgumentImpl implements SearchArgument { } else if (literalList != null) { for(Object lit: literalList) { buffer.append(' '); - buffer.append(lit.toString()); + buffer.append(lit == null ? "null" : lit.toString()); } } buffer.append(')'); @@ -146,13 +138,9 @@ final class SearchArgumentImpl implements SearchArgument { } private static boolean isEqual(Object left, Object right) { - if (left == right) { - return true; - } else if (left == null || right == null) { - return false; - } else { - return left.equals(right); - } + + return left == right || + (left != null && right != null && left.equals(right)); } @Override @@ -182,286 +170,315 @@ final class SearchArgumentImpl implements SearchArgument { } } - static class ExpressionBuilder { - // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe + + private final List leaves; + private final ExpressionTree expression; + + SearchArgumentImpl(ExpressionTree expression, List leaves) { + this.expression = expression; + this.leaves = leaves; + } + + // Used by kyro + @SuppressWarnings("unused") + SearchArgumentImpl() { + leaves = null; + expression = null; + } + + @Override + public List getLeaves() { + return leaves; + } + + @Override + public TruthValue evaluate(TruthValue[] leaves) { + return expression == null ? TruthValue.YES : expression.evaluate(leaves); + } + + @Override + public ExpressionTree getExpression() { + return expression; + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + for(int i=0; i < leaves.size(); ++i) { + buffer.append("leaf-"); + buffer.append(i); + buffer.append(" = "); + buffer.append(leaves.get(i).toString()); + buffer.append('\n'); + } + buffer.append("expr = "); + buffer.append(expression); + return buffer.toString(); + } + + public String toKryo() { + Output out = new Output(4 * 1024, 10 * 1024 * 1024); + new Kryo().writeObject(out, this); + out.close(); + return Base64.encodeBase64String(out.toBytes()); + } + + static class BuilderImpl implements Builder { + + // max threshold for CNF conversion. having >8 elements in andList will be + // converted to maybe private static final int CNF_COMBINATIONS_THRESHOLD = 256; - private final List leaves = new ArrayList(); - /** - * Get the type of the given expression node. - * @param expr the expression to get the type of - * @return int, string, or float or null if we don't know the type - */ - private static PredicateLeaf.Type getType(ExprNodeDesc expr) { - TypeInfo type = expr.getTypeInfo(); - if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) { - switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - return PredicateLeaf.Type.INTEGER; - case LONG: - return PredicateLeaf.Type.LONG; - case CHAR: - case VARCHAR: - case STRING: - return PredicateLeaf.Type.STRING; - case FLOAT: - case DOUBLE: - return PredicateLeaf.Type.FLOAT; - case DATE: - return PredicateLeaf.Type.DATE; - case TIMESTAMP: - return PredicateLeaf.Type.TIMESTAMP; - case DECIMAL: - return PredicateLeaf.Type.DECIMAL; - case BOOLEAN: - return PredicateLeaf.Type.BOOLEAN; - default: - } - } - return null; + private final Deque currentTree = + new ArrayDeque(); + private final Map leaves = + new HashMap(); + private final ExpressionTree root = + new ExpressionTree(ExpressionTree.Operator.AND); + { + currentTree.add(root); } - /** - * Get the column name referenced in the expression. It must be at the top - * level of this expression and there must be exactly one column. - * @param expr the expression to look in - * @param variable the slot the variable is expected in - * @return the column name or null if there isn't exactly one column - */ - private static String getColumnName(ExprNodeGenericFuncDesc expr, - int variable) { - List children = expr.getChildren(); - if (variable < 0 || variable >= children.size()) { - return null; + @Override + public Builder startOr() { + ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR); + currentTree.getFirst().getChildren().add(node); + currentTree.addFirst(node); + return this; + } + + @Override + public Builder startAnd() { + ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND); + currentTree.getFirst().getChildren().add(node); + currentTree.addFirst(node); + return this; + } + + @Override + public Builder startNot() { + ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT); + currentTree.getFirst().getChildren().add(node); + currentTree.addFirst(node); + return this; + } + + @Override + public Builder end() { + ExpressionTree current = currentTree.removeFirst(); + if (current.getChildren().size() == 0) { + throw new IllegalArgumentException("Can't create expression " + root + + " with no children."); } - ExprNodeDesc child = children.get(variable); - if (child instanceof ExprNodeColumnDesc) { - return ((ExprNodeColumnDesc) child).getColumn(); + if (current.getOperator() == ExpressionTree.Operator.NOT && + current.getChildren().size() != 1) { + throw new IllegalArgumentException("Can't create not expression " + + current + " with more than 1 child."); } - return null; + return this; } - private static Object boxLiteral(ExprNodeConstantDesc lit) { - switch (getType(lit)) { - case INTEGER: - return ((Number) lit.getValue()).intValue(); - case LONG: - return ((Number) lit.getValue()).longValue(); - case STRING: - return StringUtils.stripEnd(lit.getValue().toString(), null); - case FLOAT: - return Double.parseDouble(lit.getValue().toString()); - case DATE: - case TIMESTAMP: - case DECIMAL: - case BOOLEAN: - return lit; - default: - throw new IllegalArgumentException("Unknown literal " + getType(lit)); + private int addLeaf(PredicateLeaf leaf) { + Integer result = leaves.get(leaf); + if (result == null) { + int id = leaves.size(); + leaves.put(leaf, id); + return id; + } else { + return result; } } - private static Object getLiteral(ExprNodeGenericFuncDesc expr) { - Object result = null; - List children = expr.getChildren(); - if (children.size() != 2) { - return null; - } - for(ExprNodeDesc child: children) { - if (child instanceof ExprNodeConstantDesc) { - if (result != null) { - return null; - } - result = boxLiteral((ExprNodeConstantDesc) child); - } + @Override + public Builder lessThan(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - return result; + return this; } - private static List getLiteralList(ExprNodeGenericFuncDesc expr, - int start) { - List result = new ArrayList(); - List children = expr.getChildren(); - // ignore the first child, since it is the variable - for(ExprNodeDesc child: children.subList(start, children.size())) { - if (child instanceof ExprNodeConstantDesc) { - result.add(boxLiteral((ExprNodeConstantDesc) child)); - } else { - // if we get some non-literals, we need to punt - return null; - } + @Override + public Builder lessThanEquals(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - return result; + return this; } - private ExpressionTree createLeaf(PredicateLeaf.Operator operator, - ExprNodeGenericFuncDesc expression, - List leafCache, - int variable) { - String columnName = getColumnName(expression, variable); - if (columnName == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); - } - PredicateLeaf.Type type = getType(expression.getChildren().get(variable)); - if (type == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); + @Override + public Builder equals(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } + return this; + } - Object literal = null; - List literalList = null; - switch (operator) { - case IS_NULL: - break; - case IN: - case BETWEEN: - literalList = getLiteralList(expression, variable + 1); - if (literalList == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); - } - break; - default: - literal = getLiteral(expression); - if (literal == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); - } - break; + @Override + public Builder nullSafeEquals(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - // if the variable was on the right, we need to swap things around - boolean needSwap = false; - if (variable != 0) { - if (operator == PredicateLeaf.Operator.LESS_THAN) { - needSwap = true; - operator = PredicateLeaf.Operator.LESS_THAN_EQUALS; - } else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) { - needSwap = true; - operator = PredicateLeaf.Operator.LESS_THAN; + return this; + } + + @Override + public Builder in(String column, PredicateLeaf.Type type, + Object... literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + if (literal.length == 0) { + throw new IllegalArgumentException("Can't create in expression with " + + "no arguments"); } + List argList = new ArrayList(); + argList.addAll(Arrays.asList(literal)); + + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.IN, + type, column, null, argList); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - leafCache.add(new PredicateLeafImpl(operator, type, columnName, - literal, literalList)); - ExpressionTree result = new ExpressionTree(leafCache.size() - 1); - if (needSwap) { - result = negate(result); - } - return result; + return this; } - /** - * Find the variable in the expression. - * @param expr the expression to look in - * @return the index of the variable or -1 if there is not exactly one - * variable. - */ - private int findVariable(ExprNodeDesc expr) { - int result = -1; - List children = expr.getChildren(); - for(int i = 0; i < children.size(); ++i) { - ExprNodeDesc child = children.get(i); - if (child instanceof ExprNodeColumnDesc) { - // if we already found a variable, this isn't a sarg - if (result != -1) { - return -1; - } else { - result = i; - } - } + @Override + public Builder isNull(String column, PredicateLeaf.Type type) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL, + type, column, null, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - return result; + return this; } - /** - * Create a leaf expression when we aren't sure where the variable is - * located. - * @param operator the operator type that was found - * @param expression the expression to check - * @param leafCache the list of leaves - * @return if the expression is a sarg, return it, otherwise null - */ - private ExpressionTree createLeaf(PredicateLeaf.Operator operator, - ExprNodeGenericFuncDesc expression, - List leafCache) { - return createLeaf(operator, expression, leafCache, - findVariable(expression)); + @Override + public Builder between(String column, PredicateLeaf.Type type, Object lower, + Object upper) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || lower == null || upper == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + List argList = new ArrayList(); + argList.add(lower); + argList.add(upper); + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN, + type, column, null, argList); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); + } + return this; } - private ExpressionTree negate(ExpressionTree expr) { - ExpressionTree result = new ExpressionTree(ExpressionTree.Operator.NOT); - result.getChildren().add(expr); - return result; + @Override + public Builder literal(TruthValue truth) { + ExpressionTree parent = currentTree.getFirst(); + parent.getChildren().add(new ExpressionTree(truth)); + return this; } - private void addChildren(ExpressionTree result, - ExprNodeGenericFuncDesc node, - List leafCache) { - for(ExprNodeDesc child: node.getChildren()) { - result.getChildren().add(parse(child, leafCache)); + /** + * Recursively explore the tree to find the leaves that are still reachable + * after optimizations. + * @param tree the node to check next + * @param next the next available leaf id + * @param leafReorder + * @return the next available leaf id + */ + static int compactLeaves(ExpressionTree tree, int next, int[] leafReorder) { + if (tree.getOperator() == ExpressionTree.Operator.LEAF) { + int oldLeaf = tree.getLeaf(); + if (leafReorder[oldLeaf] == -1) { + leafReorder[oldLeaf] = next++; + } + } else if (tree.getChildren() != null){ + for(ExpressionTree child: tree.getChildren()) { + next = compactLeaves(child, next, leafReorder); + } } + return next; } /** - * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree. - * @param expression the Hive ExprNodeDesc - * @return the non-normalized ExpressionTree + * Rewrite expression tree to update the leaves. + * @param root the root of the tree to fix + * @param leafReorder a map from old leaf ids to new leaf ids + * @return the fixed root */ - private ExpressionTree parse(ExprNodeDesc expression, - List leafCache) { - // if we don't know the expression, just assume maybe - if (expression.getClass() != ExprNodeGenericFuncDesc.class) { - return new ExpressionTree(TruthValue.YES_NO_NULL); + static ExpressionTree rewriteLeaves(ExpressionTree root, + int[] leafReorder) { + if (root.getOperator() == ExpressionTree.Operator.LEAF) { + return new ExpressionTree(leafReorder[root.getLeaf()]); + } else if (root.getChildren() != null){ + List children = root.getChildren(); + for(int i=0; i < children.size(); ++i) { + children.set(i, rewriteLeaves(children.get(i), leafReorder)); + } } - // get the kind of expression - ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression; - Class op = expr.getGenericUDF().getClass(); - ExpressionTree result; - - // handle the logical operators - if (op == GenericUDFOPOr.class) { - result = new ExpressionTree(ExpressionTree.Operator.OR); - addChildren(result, expr, leafCache); - } else if (op == GenericUDFOPAnd.class) { - result = new ExpressionTree(ExpressionTree.Operator.AND); - addChildren(result, expr, leafCache); - } else if (op == GenericUDFOPNot.class) { - result = new ExpressionTree(ExpressionTree.Operator.NOT); - addChildren(result, expr, leafCache); - } else if (op == GenericUDFOPEqual.class) { - result = createLeaf(PredicateLeaf.Operator.EQUALS, expr, leafCache); - } else if (op == GenericUDFOPNotEqual.class) { - result = negate(createLeaf(PredicateLeaf.Operator.EQUALS, expr, - leafCache)); - } else if (op == GenericUDFOPEqualNS.class) { - result = createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr, - leafCache); - } else if (op == GenericUDFOPGreaterThan.class) { - result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, - expr, leafCache)); - } else if (op == GenericUDFOPEqualOrGreaterThan.class) { - result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN, expr, - leafCache)); - } else if (op == GenericUDFOPLessThan.class) { - result = createLeaf(PredicateLeaf.Operator.LESS_THAN, expr, leafCache); - } else if (op == GenericUDFOPEqualOrLessThan.class) { - result = createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr, - leafCache); - } else if (op == GenericUDFIn.class) { - result = createLeaf(PredicateLeaf.Operator.IN, expr, leafCache, 0); - } else if (op == GenericUDFBetween.class) { - result = createLeaf(PredicateLeaf.Operator.BETWEEN, expr, leafCache, - 1); - } else if (op == GenericUDFOPNull.class) { - result = createLeaf(PredicateLeaf.Operator.IS_NULL, expr, leafCache, - 0); - } else if (op == GenericUDFOPNotNull.class) { - result = negate(createLeaf(PredicateLeaf.Operator.IS_NULL, expr, - leafCache, 0)); + return root; + } - // otherwise, we didn't understand it, so mark it maybe - } else { - result = new ExpressionTree(TruthValue.YES_NO_NULL); + @Override + public SearchArgument build() { + if (currentTree.size() != 1) { + throw new IllegalArgumentException("Failed to end " + + currentTree.size() + " operations."); } - return result; + ExpressionTree optimized = pushDownNot(root); + optimized = foldMaybe(optimized); + optimized = flatten(optimized); + optimized = convertToCNF(optimized); + optimized = flatten(optimized); + int leafReorder[] = new int[leaves.size()]; + Arrays.fill(leafReorder, -1); + int newLeafCount = compactLeaves(optimized, 0, leafReorder); + optimized = rewriteLeaves(optimized, leafReorder); + ArrayList leafList = new ArrayList<>(newLeafCount); + // expand list to correct size + for(int i=0; i < newLeafCount; ++i) { + leafList.add(null); + } + // build the new list + for(Map.Entry elem: leaves.entrySet()) { + int newLoc = leafReorder[elem.getValue()]; + if (newLoc != -1) { + leafList.set(newLoc, elem.getKey()); + } + } + return new SearchArgumentImpl(optimized, leafList); } /** @@ -528,7 +545,7 @@ final class SearchArgumentImpl implements SearchArgument { return child; default: throw new IllegalStateException("Got a maybe as child of " + - expr); + expr); } } else { expr.getChildren().set(i, child); @@ -542,6 +559,45 @@ final class SearchArgumentImpl implements SearchArgument { } /** + * Converts multi-level ands and ors into single level ones. + * @param root the expression to flatten + * @return the flattened expression, which will always be root with + * potentially modified children. + */ + static ExpressionTree flatten(ExpressionTree root) { + if (root.getChildren() != null) { + // iterate through the index, so that if we add more children, + // they don't get re-visited + for(int i=0; i < root.getChildren().size(); ++i) { + ExpressionTree child = flatten(root.getChildren().get(i)); + // do we need to flatten? + if (child.getOperator() == root.getOperator() && + child.getOperator() != ExpressionTree.Operator.NOT) { + boolean first = true; + for(ExpressionTree grandkid: child.getChildren()) { + // for the first grandkid replace the original parent + if (first) { + first = false; + root.getChildren().set(i, grandkid); + } else { + root.getChildren().add(++i, grandkid); + } + } + } else { + root.getChildren().set(i, child); + } + } + // if we have a singleton AND or OR, just return the child + if ((root.getOperator() == ExpressionTree.Operator.OR || + root.getOperator() == ExpressionTree.Operator.AND) && + root.getChildren().size() == 1) { + return root.getChildren().get(0); + } + } + return root; + } + + /** * Generate all combinations of items on the andList. For each item on the * andList, it generates all combinations of one child from each and * expression. Thus, (and a b) (and c d) will be expanded to: (or a c) @@ -554,7 +610,7 @@ final class SearchArgumentImpl implements SearchArgument { private static void generateAllCombinations(List result, List andList, List nonAndList - ) { + ) { List kids = andList.get(0).getChildren(); if (result.isEmpty()) { for(ExpressionTree kid: kids) { @@ -637,391 +693,5 @@ final class SearchArgumentImpl implements SearchArgument { return true; } - /** - * Converts multi-level ands and ors into single level ones. - * @param root the expression to flatten - * @return the flattened expression, which will always be root with - * potentially modified children. - */ - static ExpressionTree flatten(ExpressionTree root) { - if (root.getChildren() != null) { - // iterate through the index, so that if we add more children, - // they don't get re-visited - for(int i=0; i < root.getChildren().size(); ++i) { - ExpressionTree child = flatten(root.getChildren().get(i)); - // do we need to flatten? - if (child.getOperator() == root.getOperator() && - child.getOperator() != ExpressionTree.Operator.NOT) { - boolean first = true; - for(ExpressionTree grandkid: child.getChildren()) { - // for the first grandkid replace the original parent - if (first) { - first = false; - root.getChildren().set(i, grandkid); - } else { - root.getChildren().add(++i, grandkid); - } - } - } else { - root.getChildren().set(i, child); - } - } - // if we have a singleton AND or OR, just return the child - if ((root.getOperator() == ExpressionTree.Operator.OR || - root.getOperator() == ExpressionTree.Operator.AND) && - root.getChildren().size() == 1) { - return root.getChildren().get(0); - } - } - return root; - } - - /** - * Iterates through the expression, finding all of the leaves. It creates - * the leaves list with each unique leaf that is found in the expression. - * The expression is updated with the new leaf ids for each leaf. - * @param expr the expression to find the leaves in - * @param leafCache the list of all of the leaves - * @param lookup a map that is used to uniquify the leaves - * @return The potentially modified expression - */ - private ExpressionTree buildLeafList(ExpressionTree expr, - List leafCache, - Map lookup) { - if (expr.getChildren() != null) { - for(int i=0; i < expr.getChildren().size(); ++i) { - expr.getChildren().set(i, buildLeafList(expr.getChildren().get(i), - leafCache, lookup)); - } - } else if (expr.getOperator() == ExpressionTree.Operator.LEAF) { - PredicateLeaf leaf = leafCache.get(expr.getLeaf()); - ExpressionTree val = lookup.get(leaf); - if (val == null) { - val = new ExpressionTree(leaves.size()); - lookup.put(leaf, val); - leaves.add(leaf); - } - return val; - } - return expr; - } - - /** - * Builds the expression and leaf list from the original predicate. - * @param expression the expression to translate - * @return The normalized expression. - */ - ExpressionTree expression(ExprNodeGenericFuncDesc expression) { - List leafCache = new ArrayList(); - ExpressionTree expr = parse(expression, leafCache); - return expression(expr, leafCache); - } - - /** - * Builds the expression and optimized leaf list from a non-normalized - * expression. Sets the leaves field with the unique leaves. - * @param expr non-normalized expression - * @param leaves non-unique leaves - * @return the normalized expression - */ - ExpressionTree expression(ExpressionTree expr, - List leaves) { - expr = pushDownNot(expr); - expr = foldMaybe(expr); - expr = flatten(expr); - expr = convertToCNF(expr); - expr = flatten(expr); - expr = buildLeafList(expr, leaves, - new HashMap()); - return expr; - } - - List getLeaves() { - return leaves; - } - } - - private final List leaves; - private final ExpressionTree expression; - - SearchArgumentImpl(ExprNodeGenericFuncDesc expr) { - if (expr == null) { - leaves = new ArrayList(); - expression = null; - } else { - ExpressionBuilder builder = new ExpressionBuilder(); - expression = builder.expression(expr); - leaves = builder.getLeaves(); - } - } - - SearchArgumentImpl() { - leaves = null; - expression = null; - } - - SearchArgumentImpl(ExpressionTree expression, List leaves) { - this.expression = expression; - this.leaves = leaves; - } - - @Override - public List getLeaves() { - return leaves; - } - - @Override - public TruthValue evaluate(TruthValue[] leaves) { - return expression == null ? TruthValue.YES : expression.evaluate(leaves); - } - - @Override - public ExpressionTree getExpression() { - return expression; - } - - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - for(int i=0; i < leaves.size(); ++i) { - buffer.append("leaf-"); - buffer.append(i); - buffer.append(" = "); - buffer.append(leaves.get(i).toString()); - buffer.append('\n'); - } - buffer.append("expr = "); - buffer.append(expression); - return buffer.toString(); - } - - public String toKryo() { - Output out = new Output(4 * 1024, 10 * 1024 * 1024); - new Kryo().writeObject(out, this); - out.close(); - return Base64.encodeBase64String(out.toBytes()); - } - - static SearchArgument fromKryo(String value) { - Input input = new Input(Base64.decodeBase64(value)); - return new Kryo().readObject(input, SearchArgumentImpl.class); - } - - private static class BuilderImpl implements Builder { - private final Deque currentTree = - new ArrayDeque(); - private final List leaves = new ArrayList(); - private ExpressionTree root = null; - - @Override - public Builder startOr() { - ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR); - if (currentTree.size() != 0) { - ExpressionTree parent = currentTree.getFirst(); - parent.getChildren().add(node); - } - currentTree.addFirst(node); - return this; - } - - @Override - public Builder startAnd() { - ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND); - if (currentTree.size() != 0) { - ExpressionTree parent = currentTree.getFirst(); - parent.getChildren().add(node); - } - currentTree.addFirst(node); - return this; - } - - @Override - public Builder startNot() { - ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT); - if (currentTree.size() != 0) { - ExpressionTree parent = currentTree.getFirst(); - parent.getChildren().add(node); - } - currentTree.addFirst(node); - return this; - } - - @Override - public Builder end() { - root = currentTree.removeFirst(); - if (root.getChildren().size() == 0) { - throw new IllegalArgumentException("Can't create expression " + root + - " with no children."); - } - if (root.getOperator() == ExpressionTree.Operator.NOT && - root.getChildren().size() != 1) { - throw new IllegalArgumentException("Can't create not expression " + - root + " with more than 1 child."); - } - return this; - } - - private static Object boxLiteral(Object literal) { - if (literal instanceof String || - literal instanceof Long || - literal instanceof Double || - literal instanceof DateWritable || - literal instanceof Timestamp || - literal instanceof HiveDecimal || - literal instanceof BigDecimal || - literal instanceof Boolean) { - return literal; - } else if (literal instanceof HiveChar || - literal instanceof HiveVarchar) { - return StringUtils.stripEnd(literal.toString(), null); - } else if (literal instanceof Byte || - literal instanceof Short || - literal instanceof Integer) { - return ((Number) literal).longValue(); - } else if (literal instanceof Float) { - // to avoid change in precision when upcasting float to double - // we convert the literal to string and parse it as double. (HIVE-8460) - return Double.parseDouble(literal.toString()); - } else { - throw new IllegalArgumentException("Unknown type for literal " + - literal); - } - } - - private static PredicateLeaf.Type getType(Object literal) { - if (literal instanceof Byte || - literal instanceof Short || - literal instanceof Integer) { - return PredicateLeaf.Type.INTEGER; - } else if(literal instanceof Long){ - return PredicateLeaf.Type.LONG; - }else if (literal instanceof HiveChar || - literal instanceof HiveVarchar || - literal instanceof String) { - return PredicateLeaf.Type.STRING; - } else if (literal instanceof Float || - literal instanceof Double) { - return PredicateLeaf.Type.FLOAT; - } else if (literal instanceof DateWritable) { - return PredicateLeaf.Type.DATE; - } else if (literal instanceof Timestamp) { - return PredicateLeaf.Type.TIMESTAMP; - }else if (literal instanceof HiveDecimal || - literal instanceof BigDecimal) { - return PredicateLeaf.Type.DECIMAL; - } else if (literal instanceof Boolean) { - return PredicateLeaf.Type.BOOLEAN; - } - throw new IllegalArgumentException("Unknown type for literal " + literal); - } - - @Override - public Builder lessThan(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder lessThanEquals(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder equals(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder nullSafeEquals(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder in(String column, Object... literal) { - ExpressionTree parent = currentTree.getFirst(); - if (literal.length == 0) { - throw new IllegalArgumentException("Can't create in expression with " - + "no arguments"); - } - List argList = new ArrayList(); - for(Object lit: literal){ - argList.add(boxLiteral(lit)); - } - - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.IN, - getType(argList.get(0)), column, null, argList); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder isNull(String column) { - ExpressionTree parent = currentTree.getFirst(); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL, - PredicateLeaf.Type.STRING, column, null, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder between(String column, Object lower, Object upper) { - ExpressionTree parent = currentTree.getFirst(); - List argList = new ArrayList(); - argList.add(boxLiteral(lower)); - argList.add(boxLiteral(upper)); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN, - getType(argList.get(0)), column, null, argList); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public SearchArgument build() { - if (currentTree.size() != 0) { - throw new IllegalArgumentException("Failed to end " + - currentTree.size() + " operations."); - } - ExpressionBuilder internal = new ExpressionBuilder(); - ExpressionTree normalized = internal.expression(root, leaves); - return new SearchArgumentImpl(normalized, internal.getLeaves()); - } - } - - public static Builder newBuilder() { - return new BuilderImpl(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index e96ab2a..e40e1d2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; @@ -1746,8 +1747,8 @@ public class TestInputOutputFormat { types.add(builder.build()); types.add(builder.build()); SearchArgument isNull = SearchArgumentFactory.newBuilder() - .startAnd().isNull("cost").end().build(); - conf.set(SearchArgumentFactory.SARG_PUSHDOWN, isNull.toKryo()); + .startAnd().isNull("cost", PredicateLeaf.Type.INTEGER).end().build(); + conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, isNull.toKryo()); conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "url,cost"); options.include(new boolean[]{true, true, false, true, false}); @@ -1791,7 +1792,7 @@ public class TestInputOutputFormat { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("z", new Integer(0)) + .lessThan("z", PredicateLeaf.Type.INTEGER, new Integer(0)) .end() .build(); conf.set("sarg.pushdown", sarg.toKryo()); @@ -1833,7 +1834,7 @@ public class TestInputOutputFormat { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("z", new String("foo")) + .lessThan("z", PredicateLeaf.Type.STRING, new String("foo")) .end() .build(); conf.set("sarg.pushdown", sarg.toKryo()); http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 255565e..4480d22 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.io.ByteWritable; @@ -1922,9 +1923,9 @@ public class TestOrcFile { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .startNot() - .lessThan("int1", 300000) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 300000) .end() - .lessThan("int1", 600000) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 600000) .end() .build(); RecordReader rows = reader.rowsOptions(new Reader.Options() @@ -1945,7 +1946,7 @@ public class TestOrcFile { // look through the file with no rows selected sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("int1", 0) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 0) .end() .build(); rows = reader.rowsOptions(new Reader.Options() @@ -1958,9 +1959,9 @@ public class TestOrcFile { // select first 100 and last 100 rows sarg = SearchArgumentFactory.newBuilder() .startOr() - .lessThan("int1", 300 * 100) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 100) .startNot() - .lessThan("int1", 300 * 3400) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 3400) .end() .end() .build();