hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1640409 - in /hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ ql/src/test/...
Date Tue, 18 Nov 2014 18:53:54 GMT
Author: brock
Date: Tue Nov 18 18:53:53 2014
New Revision: 1640409

URL: http://svn.apache.org/r1640409
Log:
HIVE-8122 - Make use of SearchArgument classes for Parquet SERDE (Ferdinand Xu via Brock)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
Modified:
    hive/trunk/pom.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
    hive/trunk/serde/pom.xml
    hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java

Modified: hive/trunk/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/pom.xml?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/pom.xml (original)
+++ hive/trunk/pom.xml Tue Nov 18 18:53:53 2014
@@ -149,7 +149,7 @@
         requires netty < 3.6.0 we force hadoops version
       -->
     <netty.version>3.4.0.Final</netty.version>
-    <parquet.version>1.5.0</parquet.version>
+    <parquet.version>1.6.0rc3</parquet.version>
     <pig.version>0.12.0</pig.version>
     <protobuf.version>2.5.0</protobuf.version>
     <stax.version>1.0.1</stax.version>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Nov 18 18:53:53 2014
@@ -2388,7 +2388,7 @@ class RecordReaderImpl implements Record
       // the stats object is converted to text and comparison is performed.
       // When STRINGs are converted to other base types, NumberFormat exception
       // can occur in which case TruthValue.YES_NO_NULL value is returned
-      Object baseObj = predicate.getLiteral();
+      Object baseObj = predicate.getLiteral(PredicateLeaf.FileFormat.ORC);
       Object minValue = getConvertedStatsObj(min, baseObj);
       Object maxValue = getConvertedStatsObj(max, baseObj);
       Object predObj = getBaseObjectForComparison(baseObj, minValue);
@@ -2432,7 +2432,7 @@ class RecordReaderImpl implements Record
         if (minValue.equals(maxValue)) {
           // for a single value, look through to see if that value is in the
           // set
-          for (Object arg : predicate.getLiteralList()) {
+          for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
             predObj = getBaseObjectForComparison(arg, minValue);
             loc = compareToRange((Comparable) predObj, minValue, maxValue);
             if (loc == Location.MIN) {
@@ -2442,7 +2442,7 @@ class RecordReaderImpl implements Record
           return TruthValue.NO_NULL;
         } else {
           // are all of the values outside of the range?
-          for (Object arg : predicate.getLiteralList()) {
+          for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
             predObj = getBaseObjectForComparison(arg, minValue);
             loc = compareToRange((Comparable) predObj, minValue, maxValue);
             if (loc == Location.MIN || loc == Location.MIDDLE ||
@@ -2453,7 +2453,7 @@ class RecordReaderImpl implements Record
           return TruthValue.NO_NULL;
         }
       case BETWEEN:
-        List<Object> args = predicate.getLiteralList();
+        List<Object> args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC);
         Object predObj1 = getBaseObjectForComparison(args.get(0), minValue);
 
         loc = compareToRange((Comparable) predObj1, minValue, maxValue);

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java?rev=1640409&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java Tue Nov 18 18:53:53 2014
@@ -0,0 +1,80 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
+
+import static parquet.filter2.predicate.FilterApi.not;
+import static parquet.filter2.predicate.FilterApi.or;
+
+/**
+ * The base class for building parquet supported filter predicate in primary types.
+ */
+public abstract class FilterPredicateLeafBuilder {
+
+  /**
+   * Build filter predicate with multiple constants
+   *
+   * @param op         IN or BETWEEN
+   * @param literals
+   * @param columnName
+   * @return
+   */
+  public FilterPredicate buildPredicate(PredicateLeaf.Operator op, List<Object> literals,
+                                        String columnName) throws Exception {
+    FilterPredicate result = null;
+    switch (op) {
+      case IN:
+        for (Object literal : literals) {
+          if (result == null) {
+            result = buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName);
+          } else {
+            result = or(result, buildPredict(PredicateLeaf.Operator.EQUALS, literal,
+                columnName));
+          }
+        }
+        return result;
+      case BETWEEN:
+        if (literals.size() != 2) {
+          throw new RuntimeException(
+            "Not able to build 'between' operation filter with " + literals +
+              " which needs two literals");
+        }
+        Object min = literals.get(0);
+        Object max = literals.get(1);
+        FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS,
+            min, columnName));
+        FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN, max, columnName);
+        result = FilterApi.and(gt, lt);
+        return result;
+      default:
+        throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+    }
+  }
+
+  /**
+   * Build predicate with a single constant
+   *
+   * @param op         EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL
+   * @param constant
+   * @param columnName
+   * @return null or a FilterPredicate, null means no filter will be executed
+   */
+  public abstract FilterPredicate buildPredict(PredicateLeaf.Operator op, Object constant,
+                                               String columnName) throws Exception;
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java?rev=1640409&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java Tue Nov 18 18:53:53 2014
@@ -0,0 +1,169 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
+
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
+import parquet.io.api.Binary;
+
+import static parquet.filter2.predicate.FilterApi.eq;
+import static parquet.filter2.predicate.FilterApi.lt;
+import static parquet.filter2.predicate.FilterApi.ltEq;
+import static parquet.filter2.predicate.FilterApi.binaryColumn;
+import static parquet.filter2.predicate.FilterApi.booleanColumn;
+import static parquet.filter2.predicate.FilterApi.doubleColumn;
+import static parquet.filter2.predicate.FilterApi.intColumn;
+
+public class LeafFilterFactory {
+  private static final Log LOG = LogFactory.getLog(LeafFilterFactory.class);
+
+  class IntFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+    /**
+     * @param op         consists of EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL
+     * @param literal
+     * @param columnName
+     * @return
+     */
+    @Override
+    public FilterPredicate buildPredict(Operator op, Object literal,
+                                        String columnName) {
+      switch (op) {
+        case LESS_THAN:
+          return lt(intColumn(columnName), ((Number) literal).intValue());
+        case IS_NULL:
+        case EQUALS:
+        case NULL_SAFE_EQUALS:
+          return eq(intColumn(columnName),
+            (literal == null) ? null : ((Number) literal).intValue());
+        case LESS_THAN_EQUALS:
+          return ltEq(intColumn(columnName), ((Number) literal).intValue());
+        default:
+          throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+      }
+    }
+  }
+
+  class LongFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+    @Override
+    public FilterPredicate buildPredict(Operator op, Object constant,
+                                        String columnName) {
+      switch (op) {
+        case LESS_THAN:
+          return lt(FilterApi.longColumn(columnName), ((Number) constant).longValue());
+        case IS_NULL:
+        case EQUALS:
+        case NULL_SAFE_EQUALS:
+          return eq(FilterApi.longColumn(columnName),
+            (constant == null) ? null : ((Number) constant).longValue());
+        case LESS_THAN_EQUALS:
+          return ltEq(FilterApi.longColumn(columnName),
+            ((Number) constant).longValue());
+        default:
+          throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+      }
+    }
+  }
+
+  class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+
+    @Override
+    public FilterPredicate buildPredict(Operator op, Object constant,
+                                        String columnName) {
+      switch (op) {
+        case LESS_THAN:
+          return lt(doubleColumn(columnName), ((Number) constant).doubleValue());
+        case IS_NULL:
+        case EQUALS:
+        case NULL_SAFE_EQUALS:
+          return eq(doubleColumn(columnName),
+            (constant == null) ? null : ((Number) constant).doubleValue());
+        case LESS_THAN_EQUALS:
+          return ltEq(FilterApi.doubleColumn(columnName),
+            ((Number) constant).doubleValue());
+        default:
+          throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+      }
+    }
+  }
+
+  class BooleanFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+    @Override
+    public FilterPredicate buildPredict(Operator op, Object constant,
+                                        String columnName) throws Exception{
+      switch (op) {
+        case IS_NULL:
+        case EQUALS:
+        case NULL_SAFE_EQUALS:
+          return eq(booleanColumn(columnName),
+            (constant == null) ? null : ((Boolean) constant).booleanValue());
+        default:
+          throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+      }
+    }
+  }
+
+  class BinaryFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+    @Override
+    public FilterPredicate buildPredict(Operator op, Object constant,
+                                        String columnName) throws Exception{
+      switch (op) {
+        case LESS_THAN:
+          return lt(binaryColumn(columnName), Binary.fromString((String) constant));
+        case IS_NULL:
+        case EQUALS:
+        case NULL_SAFE_EQUALS:
+          return eq(binaryColumn(columnName),
+            (constant == null) ? null : Binary.fromString((String) constant));
+        case LESS_THAN_EQUALS:
+          return ltEq(binaryColumn(columnName), Binary.fromString((String) constant));
+        default:
+          // should never be executed
+          throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+      }
+    }
+  }
+
+  /**
+   * get leaf filter builder by FilterPredicateType, currently date, decimal and timestamp is not
+   * supported yet.
+   * @param type FilterPredicateType
+   * @return
+   */
+  public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){
+    switch (type){
+      case INTEGER:
+        return new IntFilterPredicateLeafBuilder();
+      case LONG:
+        return new LongFilterPredicateLeafBuilder();
+      case FLOAT:   // float and double
+        return new DoubleFilterPredicateLeafBuilder();
+      case STRING:  // string, char, varchar
+        return new BinaryFilterPredicateLeafBuilder();
+      case BOOLEAN:
+        return new BooleanFilterPredicateLeafBuilder();
+      case DATE:
+      case DECIMAL:
+      case TIMESTAMP:
+      default:
+        LOG.debug("Conversion to Parquet FilterPredicate not supported for " + type);
+        return null;
+    }
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java Tue Nov 18 18:53:53 2014
@@ -20,8 +20,12 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.FileSplit;
@@ -32,6 +36,7 @@ import org.apache.hadoop.mapred.Reporter
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 
+import parquet.filter2.predicate.FilterPredicate;
 import parquet.hadoop.ParquetFileReader;
 import parquet.hadoop.ParquetInputFormat;
 import parquet.hadoop.ParquetInputSplit;
@@ -83,6 +88,8 @@ public class ParquetRecordReaderWrapper 
       taskAttemptID = new TaskAttemptID();
     }
 
+    setFilter(oldJobConf);
+
     // create a TaskInputOutputContext
     final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID);
 
@@ -110,6 +117,27 @@ public class ParquetRecordReaderWrapper 
     }
   }
 
+  public void setFilter(final JobConf conf) {
+    String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+    String columnNamesString =
+      conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+    if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty() ||
+      columnNamesString.isEmpty()) {
+      return;
+    }
+
+    FilterPredicate p =
+      SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown))
+        .toFilterPredicate();
+    if (p != null) {
+      LOG.debug("Predicate filter for parquet is " + p.toString());
+      ParquetInputFormat.setFilterPredicate(conf, p);
+    } else {
+      LOG.debug("No predicate filter can be generated for " + TableScanDesc.FILTER_EXPR_CONF_STR +
+        " with the value of " + serializedPushdown);
+    }
+  }
+
   @Override
   public void close() throws IOException {
     if (realReader != null) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Tue Nov 18 18:53:53 2014
@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.hive.ql.io.sarg;
 
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
 import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayDeque;
@@ -30,9 +34,13 @@ import java.util.Map;
 
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
+import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -56,14 +64,14 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
-import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.io.Input;
-import com.esotericsoftware.kryo.io.Output;
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
 
 /**
  * The implementation of SearchArguments.
  */
 final class SearchArgumentImpl implements SearchArgument {
+  public static final Log LOG = LogFactory.getLog(SearchArgumentImpl.class);
 
   static final class PredicateLeafImpl implements PredicateLeaf {
     private final Operator operator;
@@ -98,7 +106,7 @@ final class SearchArgumentImpl implement
     }
 
     @Override
-    public Type getType() {
+    public Type getType(){
       return type;
     }
 
@@ -108,18 +116,55 @@ final class SearchArgumentImpl implement
     }
 
     @Override
-    public Object getLiteral() {
+    public Object getLiteral(FileFormat format) {
       // To get around a kryo 2.22 bug while deserialize a Timestamp into Date
       // (https://github.com/EsotericSoftware/kryo/issues/88)
       // When we see a Date, convert back into Timestamp
       if (literal instanceof java.util.Date) {
-        return new Timestamp(((java.util.Date)literal).getTime());
+        return new Timestamp(((java.util.Date) literal).getTime());
+      }
+
+      switch (format) {
+        case ORC:
+          // adapt base type to what orc needs
+          if (literal instanceof Integer) {
+            return Long.valueOf(literal.toString());
+          }
+          return literal;
+        case PARQUET:
+          return literal;
+        default:
+          throw new RuntimeException(
+            "File format " + format + "is not support to build search arguments");
       }
-      return literal;
     }
 
     @Override
-    public List<Object> getLiteralList() {
+    public List<Object> getLiteralList(FileFormat format) {
+      switch (format) {
+        case ORC:
+          return getOrcLiteralList();
+        case PARQUET:
+          return getParquetLiteralList();
+        default:
+          throw new RuntimeException("File format is not support to build search arguments");
+      }
+    }
+
+    private List<Object> getOrcLiteralList() {
+      // no need to cast
+      if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof
+          Integer)) {
+        return literalList;
+      }
+      List<Object> result = new ArrayList<Object>();
+      for (Object o : literalList) {
+        result.add(Long.valueOf(o.toString()));
+      }
+      return result;
+    }
+
+    private List<Object> getParquetLiteralList() {
       return literalList;
     }
 
@@ -254,6 +299,76 @@ final class SearchArgumentImpl implement
       }
     }
 
+    FilterPredicate translate(List<PredicateLeaf> leafs){
+      FilterPredicate p = null;
+      switch (operator) {
+        case OR:
+          for(ExpressionTree child: children) {
+            if (p == null) {
+              p = child.translate(leafs);
+            } else {
+              FilterPredicate right = child.translate(leafs);
+              // constant means no filter, ignore it when it is null
+              if(right != null){
+                p = FilterApi.or(p, right);
+              }
+            }
+          }
+          return p;
+        case AND:
+          for(ExpressionTree child: children) {
+            if (p == null) {
+              p = child.translate(leafs);
+            } else {
+              FilterPredicate right = child.translate(leafs);
+              // constant means no filter, ignore it when it is null
+              if(right != null){
+                p = FilterApi.and(p, right);
+              }
+            }
+          }
+          return p;
+        case NOT:
+          FilterPredicate op = children.get(0).translate(leafs);
+          if (op != null) {
+            return FilterApi.not(op);
+          } else {
+            return null;
+          }
+        case LEAF:
+          return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf));
+        case CONSTANT:
+          return null;// no filter will be executed for constant
+        default:
+          throw new IllegalStateException("Unknown operator: " + operator);
+      }
+    }
+
+    private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) {
+      LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
+      FilterPredicateLeafBuilder builder;
+      try {
+        builder = leafFilterFactory
+          .getLeafFilterBuilderByType(leaf.getType());
+        if (builder == null) return null;
+        if (isMultiLiteralsOperator(leaf.getOperator())) {
+          return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList(
+            PredicateLeaf.FileFormat.PARQUET), leaf.getColumnName());
+        } else {
+          return builder
+            .buildPredict(leaf.getOperator(), leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET),
+              leaf.getColumnName());
+        }
+      } catch (Exception e) {
+        LOG.error("fail to build predicate filter leaf with errors" + e, e);
+        return null;
+      }
+    }
+
+    private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
+      return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN);
+    }
+
     @Override
     public String toString() {
       StringBuilder buffer = new StringBuilder();
@@ -314,8 +429,9 @@ final class SearchArgumentImpl implement
           case BYTE:
           case SHORT:
           case INT:
-          case LONG:
             return PredicateLeaf.Type.INTEGER;
+          case LONG:
+            return PredicateLeaf.Type.LONG;
           case CHAR:
           case VARCHAR:
           case STRING:
@@ -360,6 +476,8 @@ final class SearchArgumentImpl implement
     private static Object boxLiteral(ExprNodeConstantDesc lit) {
       switch (getType(lit)) {
         case INTEGER:
+          return ((Number) lit.getValue()).intValue();
+        case LONG:
           return ((Number) lit.getValue()).longValue();
         case STRING:
           return StringUtils.stripEnd(lit.getValue().toString(), null);
@@ -420,6 +538,7 @@ final class SearchArgumentImpl implement
       if (type == null) {
         return new ExpressionTree(TruthValue.YES_NO_NULL);
       }
+
       Object literal = null;
       List<Object> literalList = null;
       switch (operator) {
@@ -903,6 +1022,11 @@ final class SearchArgumentImpl implement
     return new Kryo().readObject(input, SearchArgumentImpl.class);
   }
 
+  @Override
+  public FilterPredicate toFilterPredicate() {
+    return expression.translate(leaves);
+  }
+
   private static class BuilderImpl implements Builder {
     private final Deque<ExpressionTree> currentTree =
         new ArrayDeque<ExpressionTree>();
@@ -987,10 +1111,11 @@ final class SearchArgumentImpl implement
     private static PredicateLeaf.Type getType(Object literal) {
       if (literal instanceof Byte ||
           literal instanceof Short ||
-          literal instanceof Integer ||
-          literal instanceof Long) {
+          literal instanceof Integer) {
         return PredicateLeaf.Type.INTEGER;
-      } else if (literal instanceof HiveChar ||
+      } else if(literal instanceof Long){
+        return PredicateLeaf.Type.LONG;
+      }else if (literal instanceof HiveChar ||
           literal instanceof HiveVarchar ||
           literal instanceof String) {
         return PredicateLeaf.Type.STRING;
@@ -1005,7 +1130,7 @@ final class SearchArgumentImpl implement
           literal instanceof BigDecimal) {
         return PredicateLeaf.Type.DECIMAL;
       } else if (literal instanceof Boolean) {
-    	return PredicateLeaf.Type.BOOLEAN;
+        return PredicateLeaf.Type.BOOLEAN;
       }
       throw new IllegalArgumentException("Unknown type for literal " + literal);
     }
@@ -1069,6 +1194,7 @@ final class SearchArgumentImpl implement
       for(Object lit: literal){
         argList.add(boxLiteral(lit));
       }
+
       PredicateLeaf leaf =
           new PredicateLeafImpl(PredicateLeaf.Operator.IN,
               getType(argList.get(0)), column, null, argList);

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java Tue Nov 18 18:53:53 2014
@@ -22,14 +22,13 @@ import com.google.common.collect.Sets;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.junit.Test;
+import org.junit.*;
+import parquet.filter2.predicate.FilterPredicate;
 
 import java.beans.XMLDecoder;
 import java.io.ByteArrayInputStream;
@@ -39,6 +38,7 @@ import java.util.List;
 import java.util.Set;
 
 import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNull;
 import static junit.framework.Assert.assertTrue;
 
 /**
@@ -47,7 +47,7 @@ import static junit.framework.Assert.ass
  * to true and using a custom record reader that prints out the value of
  * hive.io.filter.expr.serialized in createRecordReader. This should be
  * replaced by generating the AST using the API and passing that in.
- *
+ * <p/>
  * In each case, the corresponding part of the where clause is in the
  * comment above the blob.
  */
@@ -76,12 +76,11 @@ public class TestSearchArgumentImpl {
   /**
    * Create a predicate leaf. This is used by another test.
    */
-  public static
-  PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
-                                    PredicateLeaf.Type type,
-                                    String columnName,
-                                    Object literal,
-                                    List<Object> literalList) {
+  public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
+                                                  PredicateLeaf.Type type,
+                                                  String columnName,
+                                                  Object literal,
+                                                  List<Object> literalList) {
     return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
         literal, literalList);
   }
@@ -134,7 +133,7 @@ public class TestSearchArgumentImpl {
         ).toString());
     assertEquals("(and leaf-1 leaf-2 leaf-3 leaf-4)",
         ExpressionBuilder.flatten(and(and(leaf(1), leaf(2)),
-            and(leaf(3),leaf(4)))).toString());
+            and(leaf(3), leaf(4)))).toString());
     assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4)",
         ExpressionBuilder.flatten(or(leaf(1), or(leaf(2), or(leaf(3),
             leaf(4))))).toString());
@@ -143,11 +142,11 @@ public class TestSearchArgumentImpl {
             leaf(4))).toString());
     assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6)",
         ExpressionBuilder.flatten(or(or(leaf(1), or(leaf(2), leaf(3))),
-            or(or(leaf(4),leaf(5)), leaf(6)))).toString());
+            or(or(leaf(4), leaf(5)), leaf(6)))).toString());
     assertEquals("(and (not leaf-1) leaf-2 (not leaf-3) leaf-4 (not leaf-5) leaf-6)",
         ExpressionBuilder.flatten(and(and(not(leaf(1)), and(leaf(2),
             not(leaf(3)))), and(and(leaf(4), not(leaf(5))), leaf(6)))
-            ).toString());
+        ).toString());
     assertEquals("(not (and leaf-1 leaf-2 leaf-3))",
         ExpressionBuilder.flatten(not(and(leaf(1), and(leaf(2), leaf(3))))
         ).toString());
@@ -245,20 +244,20 @@ public class TestSearchArgumentImpl {
 
   private static void assertNoSharedNodes(ExpressionTree tree,
                                           Set<ExpressionTree> seen
-                                         ) throws Exception {
+  ) throws Exception {
     if (seen.contains(tree) &&
         tree.getOperator() != ExpressionTree.Operator.LEAF) {
       assertTrue("repeated node in expression " + tree, false);
     }
     seen.add(tree);
     if (tree.getChildren() != null) {
-      for(ExpressionTree child: tree.getChildren()) {
+      for (ExpressionTree child : tree.getChildren()) {
         assertNoSharedNodes(child, seen);
       }
     }
   }
 
-  private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) {
+  private ExprNodeGenericFuncDesc getFuncDesc(String xmlSerialized) {
     byte[] bytes;
     try {
       bytes = xmlSerialized.getBytes("UTF-8");
@@ -275,6 +274,7 @@ public class TestSearchArgumentImpl {
       decoder.close();
     }
   }
+
   @Test
   public void testExpression1() throws Exception {
     // first_name = 'john' or
@@ -749,59 +749,85 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(9, leaves.size());
 
+    FilterPredicate p = sarg.toFilterPredicate();
+    String[] conditions = new String[]{
+      "eq(first_name, Binary{\"john\"})",    /* first_name = 'john' */
+      "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
+      "lt(first_name, Binary{\"alan\"})",   /* 'alan' > first_name */
+      "not(lteq(id, 12))",                  /* id > 12 or */
+      "not(lteq(id, 13))",                  /* 13 < id or */
+      "lt(id, 15)",                         /* id < 15 or */
+      "lt(id, 16)",                         /* 16 > id or */
+      "eq(id, 30)",                         /* id <=> 30 */
+      "eq(first_name, Binary{\"owen\"})"    /* first_name <=> 'owen' */
+    };
+    String expected = String
+      .format("and(or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %8$s), " +
+        "or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %9$s))", conditions);
+    assertEquals(expected, p.toString());
+
     PredicateLeaf leaf = leaves.get(0);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals("john", leaf.getLiteral());
+    assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(1);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals("greg", leaf.getLiteral());
+    assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(2);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals("alan", leaf.getLiteral());
+    assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(3);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12L, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(4);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(13L, leaf.getLiteral());
+    assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(5);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(15L, leaf.getLiteral());
+    assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(6);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(16L, leaf.getLiteral());
+    assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(7);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(30L, leaf.getLiteral());
+    assertEquals(30L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(30, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(8);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals("owen", leaf.getLiteral());
+    assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" +
         " (not leaf-4) leaf-5 leaf-6 leaf-7)" +
@@ -1017,30 +1043,46 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(4, leaves.size());
 
+    String[] conditions = new String[]{
+      "eq(first_name, null)",               /* first_name is null  */
+      "not(eq(first_name, Binary{\"sue\"}))",    /* first_name <> 'sue' */
+      "not(lt(id, 12))",                    /* id >= 12            */
+      "lteq(id, 4)"                         /* id <= 4             */
+    };
+
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
+    assertEquals(expected, p.toString());
+
     PredicateLeaf leaf = leaves.get(0);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals(null, leaf.getLiteral());
-    assertEquals(null, leaf.getLiteralList());
+    assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
+    assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC));
+    assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(1);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals("sue", leaf.getLiteral());
+    assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(2);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12L, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(3);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(4L, leaf.getLiteral());
+    assertEquals(4L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(4, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)",
         sarg.getExpression().toString());
@@ -1436,25 +1478,41 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(3, leaves.size());
 
+    String[] conditions = new String[]{
+      "lt(id, 45)",                         /* id between 23 and 45 */
+      "not(lteq(id, 23))",                   /* id between 23 and 45 */
+      "eq(first_name, Binary{\"alan\"})",   /* first_name = 'alan'  */
+      "eq(last_name, Binary{\"smith\"})"    /* 'smith' = last_name  */
+    };
+
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
+    assertEquals(expected, p.toString());
+
     PredicateLeaf leaf = leaves.get(0);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(null, leaf.getLiteral());
-    assertEquals(23L, leaf.getLiteralList().get(0));
-    assertEquals(45L, leaf.getLiteralList().get(1));
+    assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
+    assertEquals(23L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+    assertEquals(23, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0));
+    assertEquals(45L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
+    assertEquals(45, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1));
 
     leaf = leaves.get(1);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals("alan", leaf.getLiteral());
+    assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(2);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
     assertEquals("last_name", leaf.getColumnName());
-    assertEquals("smith", leaf.getLiteral());
+    assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     assertEquals("(and leaf-0 leaf-1 leaf-2)",
         sarg.getExpression().toString());
@@ -1646,25 +1704,41 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(3, leaves.size());
 
+    String[] conditions = new String[]{
+      "not(eq(id, 12))", /* id <> 12 */
+      "or(eq(first_name, Binary{\"john\"}), eq(first_name, Binary{\"sue\"}))", /* first_name in
+      ('john', 'sue') */
+      "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
+    };
+
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
+    assertEquals(expected, p.toString());
+
     PredicateLeaf leaf = leaves.get(0);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12L, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(1);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
     assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
     assertEquals("first_name", leaf.getColumnName());
-    assertEquals("john", leaf.getLiteralList().get(0));
-    assertEquals("sue", leaf.getLiteralList().get(1));
+    assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+    assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
+    assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0));
+    assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1));
 
     leaf = leaves.get(2);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(34L, leaf.getLiteralList().get(0));
-    assertEquals(50L, leaf.getLiteralList().get(1));
+    assertEquals(34L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+    assertEquals(50L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
+    assertEquals(34, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0));
+    assertEquals(50, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1));
 
     assertEquals("(and (not leaf-0) leaf-1 leaf-2)",
         sarg.getExpression().toString());
@@ -1901,12 +1975,17 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(1, leaves.size());
 
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected =
+      "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))";
+    assertEquals(p.toString(), expected);
+
     assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType());
     assertEquals(PredicateLeaf.Operator.BETWEEN,
         leaves.get(0).getOperator());
     assertEquals("first_name", leaves.get(0).getColumnName());
-    assertEquals("david", leaves.get(0).getLiteralList().get(0));
-    assertEquals("greg", leaves.get(0).getLiteralList().get(1));
+    assertEquals("david", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+    assertEquals("greg", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
 
     assertEquals("leaf-0",
         sarg.getExpression().toString());
@@ -2378,59 +2457,90 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(9, leaves.size());
 
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" +
+      "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " +
+      "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 16))), " +
+      "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 17))), " +
+      "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 17)))";
+    assertEquals(p.toString(), expected);
+
     PredicateLeaf leaf = leaves.get(0);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(18L, leaf.getLiteral());
+    assertEquals(18L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(18, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(1);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(10L, leaf.getLiteral());
+    assertEquals(10L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(10, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(2);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(13L, leaf.getLiteral());
+    assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(3);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(16L, leaf.getLiteral());
+    assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(4);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(11L, leaf.getLiteral());
+    assertEquals(11L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(11, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(5);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12L, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(6);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(14L, leaf.getLiteral());
+    assertEquals(14L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(14, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(7);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(15L, leaf.getLiteral());
+    assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     leaf = leaves.get(8);
     assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(17L, leaf.getLiteral());
+    assertEquals(17L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(17, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     assertEquals("(and" +
         " (or leaf-0 leaf-1 leaf-2 leaf-3)" +
@@ -2512,6 +2622,9 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(0, leaves.size());
 
+    FilterPredicate p = sarg.toFilterPredicate();
+    assertNull(p);
+
     assertEquals("YES_NO_NULL",
         sarg.getExpression().toString());
   }
@@ -2648,115 +2761,115 @@ public class TestSearchArgumentImpl {
   public void testExpression10() throws Exception {
     /* id >= 10 and not (10 > id) */
     String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" +
-        "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n"+
-        " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n"+
-        "  <void property=\"children\"> \n"+
-        "   <object class=\"java.util.ArrayList\"> \n"+
-        "    <void method=\"add\"> \n"+
-        "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n"+
-        "      <void property=\"children\"> \n"+
-        "       <object class=\"java.util.ArrayList\"> \n"+
-        "        <void method=\"add\"> \n"+
-        "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n"+
-        "          <void property=\"column\"> \n"+
-        "           <string>id</string> \n"+
-        "          </void> \n"+
-        "          <void property=\"tabAlias\"> \n"+
-        "           <string>orc_people</string> \n"+
-        "          </void> \n"+
-        "          <void property=\"typeInfo\"> \n"+
-        "           <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n"+
-        "            <void property=\"typeName\"> \n"+
-        "             <string>int</string> \n"+
-        "            </void> \n"+
-        "           </object> \n"+
-        "          </void> \n"+
-        "         </object> \n"+
-        "        </void> \n"+
-        "        <void method=\"add\"> \n"+
-        "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n"+
-        "          <void property=\"typeInfo\"> \n"+
-        "           <object idref=\"PrimitiveTypeInfo0\"/> \n"+
-        "          </void> \n"+
-        "          <void property=\"value\"> \n"+
-        "           <int>10</int> \n"+
-        "          </void> \n"+
-        "         </object> \n"+
-        "        </void> \n"+
-        "       </object> \n"+
-        "      </void> \n"+
-        "      <void property=\"genericUDF\"> \n"+
-        "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan\"/> \n"+
-        "      </void> \n"+
-        "      <void property=\"typeInfo\"> \n"+
-        "       <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n"+
-        "        <void property=\"typeName\"> \n"+
-        "         <string>boolean</string> \n"+
-        "        </void> \n"+
-        "       </object> \n"+
-        "      </void> \n"+
-        "     </object> \n"+
-        "    </void> \n"+
-        "    <void method=\"add\"> \n"+
-        "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n"+
-        "      <void property=\"children\"> \n"+
-        "       <object class=\"java.util.ArrayList\"> \n"+
-        "        <void method=\"add\"> \n"+
-        "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n"+
-        "          <void property=\"children\"> \n"+
-        "           <object class=\"java.util.ArrayList\"> \n"+
-        "            <void method=\"add\"> \n"+
-        "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n"+
-        "              <void property=\"column\"> \n"+
-        "               <string>id</string> \n"+
-        "              </void> \n"+
-        "              <void property=\"tabAlias\"> \n"+
-        "               <string>orc_people</string> \n"+
-        "              </void> \n"+
-        "              <void property=\"typeInfo\"> \n"+
-        "               <object idref=\"PrimitiveTypeInfo0\"/> \n"+
-        "              </void> \n"+
-        "             </object> \n"+
-        "            </void> \n"+
-        "            <void method=\"add\"> \n"+
-        "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n"+
-        "              <void property=\"typeInfo\"> \n"+
-        "               <object idref=\"PrimitiveTypeInfo0\"/> \n"+
-        "              </void> \n"+
-        "              <void property=\"value\"> \n"+
-        "               <int>10</int> \n"+
-        "              </void> \n"+
-        "             </object> \n"+
-        "            </void> \n"+
-        "           </object> \n"+
-        "          </void> \n"+
-        "          <void property=\"genericUDF\"> \n"+
-        "           <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan\"/> \n"+
-        "          </void> \n"+
-        "          <void property=\"typeInfo\"> \n"+
-        "           <object idref=\"PrimitiveTypeInfo1\"/> \n"+
-        "          </void> \n"+
-        "         </object> \n"+
-        "        </void> \n"+
-        "       </object> \n"+
-        "      </void> \n"+
-        "      <void property=\"genericUDF\"> \n"+
-        "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot\"/> \n"+
-        "      </void> \n"+
-        "      <void property=\"typeInfo\"> \n"+
-        "       <object idref=\"PrimitiveTypeInfo1\"/> \n"+
-        "      </void> \n"+
-        "     </object> \n"+
-        "    </void> \n"+
-        "   </object> \n"+
-        "  </void> \n"+
-        "  <void property=\"genericUDF\"> \n"+
-        "   <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n"+
-        "  </void> \n"+
-        "  <void property=\"typeInfo\"> \n"+
-        "   <object idref=\"PrimitiveTypeInfo1\"/> \n"+
-        "  </void> \n"+
-        " </object> \n"+
+        "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" +
+        " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" +
+        "  <void property=\"children\"> \n" +
+        "   <object class=\"java.util.ArrayList\"> \n" +
+        "    <void method=\"add\"> \n" +
+        "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" +
+        "      <void property=\"children\"> \n" +
+        "       <object class=\"java.util.ArrayList\"> \n" +
+        "        <void method=\"add\"> \n" +
+        "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" +
+        "          <void property=\"column\"> \n" +
+        "           <string>id</string> \n" +
+        "          </void> \n" +
+        "          <void property=\"tabAlias\"> \n" +
+        "           <string>orc_people</string> \n" +
+        "          </void> \n" +
+        "          <void property=\"typeInfo\"> \n" +
+        "           <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" +
+        "            <void property=\"typeName\"> \n" +
+        "             <string>int</string> \n" +
+        "            </void> \n" +
+        "           </object> \n" +
+        "          </void> \n" +
+        "         </object> \n" +
+        "        </void> \n" +
+        "        <void method=\"add\"> \n" +
+        "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" +
+        "          <void property=\"typeInfo\"> \n" +
+        "           <object idref=\"PrimitiveTypeInfo0\"/> \n" +
+        "          </void> \n" +
+        "          <void property=\"value\"> \n" +
+        "           <int>10</int> \n" +
+        "          </void> \n" +
+        "         </object> \n" +
+        "        </void> \n" +
+        "       </object> \n" +
+        "      </void> \n" +
+        "      <void property=\"genericUDF\"> \n" +
+        "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan\"/> \n" +
+        "      </void> \n" +
+        "      <void property=\"typeInfo\"> \n" +
+        "       <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" +
+        "        <void property=\"typeName\"> \n" +
+        "         <string>boolean</string> \n" +
+        "        </void> \n" +
+        "       </object> \n" +
+        "      </void> \n" +
+        "     </object> \n" +
+        "    </void> \n" +
+        "    <void method=\"add\"> \n" +
+        "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" +
+        "      <void property=\"children\"> \n" +
+        "       <object class=\"java.util.ArrayList\"> \n" +
+        "        <void method=\"add\"> \n" +
+        "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" +
+        "          <void property=\"children\"> \n" +
+        "           <object class=\"java.util.ArrayList\"> \n" +
+        "            <void method=\"add\"> \n" +
+        "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" +
+        "              <void property=\"column\"> \n" +
+        "               <string>id</string> \n" +
+        "              </void> \n" +
+        "              <void property=\"tabAlias\"> \n" +
+        "               <string>orc_people</string> \n" +
+        "              </void> \n" +
+        "              <void property=\"typeInfo\"> \n" +
+        "               <object idref=\"PrimitiveTypeInfo0\"/> \n" +
+        "              </void> \n" +
+        "             </object> \n" +
+        "            </void> \n" +
+        "            <void method=\"add\"> \n" +
+        "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" +
+        "              <void property=\"typeInfo\"> \n" +
+        "               <object idref=\"PrimitiveTypeInfo0\"/> \n" +
+        "              </void> \n" +
+        "              <void property=\"value\"> \n" +
+        "               <int>10</int> \n" +
+        "              </void> \n" +
+        "             </object> \n" +
+        "            </void> \n" +
+        "           </object> \n" +
+        "          </void> \n" +
+        "          <void property=\"genericUDF\"> \n" +
+        "           <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan\"/> \n" +
+        "          </void> \n" +
+        "          <void property=\"typeInfo\"> \n" +
+        "           <object idref=\"PrimitiveTypeInfo1\"/> \n" +
+        "          </void> \n" +
+        "         </object> \n" +
+        "        </void> \n" +
+        "       </object> \n" +
+        "      </void> \n" +
+        "      <void property=\"genericUDF\"> \n" +
+        "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot\"/> \n" +
+        "      </void> \n" +
+        "      <void property=\"typeInfo\"> \n" +
+        "       <object idref=\"PrimitiveTypeInfo1\"/> \n" +
+        "      </void> \n" +
+        "     </object> \n" +
+        "    </void> \n" +
+        "   </object> \n" +
+        "  </void> \n" +
+        "  <void property=\"genericUDF\"> \n" +
+        "   <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" +
+        "  </void> \n" +
+        "  <void property=\"typeInfo\"> \n" +
+        "   <object idref=\"PrimitiveTypeInfo1\"/> \n" +
+        "  </void> \n" +
+        " </object> \n" +
         "</java>";
 
     SearchArgumentImpl sarg =
@@ -2764,11 +2877,16 @@ public class TestSearchArgumentImpl {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(1, leaves.size());
 
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
+    assertEquals(expected, p.toString());
+
     assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN,
         leaves.get(0).getOperator());
     assertEquals("id", leaves.get(0).getColumnName());
-    assertEquals(10L, leaves.get(0).getLiteral());
+    assertEquals(10L, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.ORC));
+    assertEquals(10, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.PARQUET));
 
     assertEquals("(and (not leaf-0) (not leaf-0))",
         sarg.getExpression().toString());
@@ -2792,9 +2910,9 @@ public class TestSearchArgumentImpl {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-              .lessThan("x", 10)
-              .lessThanEquals("y", "hi")
-              .equals("z", 1.0)
+            .lessThan("x", 10)
+            .lessThanEquals("y", "hi")
+            .equals("z", 1.0)
             .end()
             .build();
     assertEquals("leaf-0 = (LESS_THAN x 10)\n" +
@@ -2803,12 +2921,12 @@ public class TestSearchArgumentImpl {
         "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString());
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
-           .startOr()
-             .isNull("x")
-             .between("y", 10, 20)
-             .in("z", 1, 2, 3)
-             .nullSafeEquals("a", "stinger")
-           .end()
+        .startOr()
+        .isNull("x")
+        .between("y", 10, 20)
+        .in("z", 1, 2, 3)
+        .nullSafeEquals("a", "stinger")
+        .end()
         .end()
         .build();
     assertEquals("leaf-0 = (IS_NULL x)\n" +
@@ -2816,6 +2934,12 @@ public class TestSearchArgumentImpl {
         "leaf-2 = (IN z 1 2 3)\n" +
         "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
         "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected =
+      "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " +
+        "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
+    assertEquals(expected, p.toString());
   }
 
   @Test
@@ -2823,24 +2947,25 @@ public class TestSearchArgumentImpl {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-              .lessThan("x", new DateWritable(10))
-              .lessThanEquals("y", new HiveChar("hi", 10))
-              .equals("z", HiveDecimal.create("1.0"))
+            .lessThan("x", new DateWritable(10))
+            .lessThanEquals("y", new HiveChar("hi", 10))
+            .equals("z", HiveDecimal.create("1.0"))
             .end()
             .build();
     assertEquals("leaf-0 = (LESS_THAN x 1970-01-11)\n" +
         "leaf-1 = (LESS_THAN_EQUALS y hi)\n" +
         "leaf-2 = (EQUALS z 1)\n" +
         "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString());
+    assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString());
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
-           .startOr()
-             .isNull("x")
-             .between("y", HiveDecimal.create(10), 20.0)
-             .in("z", (byte)1, (short)2, (int)3)
-             .nullSafeEquals("a", new HiveVarchar("stinger", 100))
-           .end()
+        .startOr()
+        .isNull("x")
+        .between("y", HiveDecimal.create(10), 20.0)
+        .in("z", (byte) 1, (short) 2, (int) 3)
+        .nullSafeEquals("a", new HiveVarchar("stinger", 100))
+        .end()
         .end()
         .build();
     assertEquals("leaf-0 = (IS_NULL x)\n" +
@@ -2848,6 +2973,11 @@ public class TestSearchArgumentImpl {
         "leaf-2 = (IN z 1 2 3)\n" +
         "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
         "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
+        "not(eq(a, Binary{\"stinger\"})))";
+    assertEquals(expected, p.toString());
   }
 
   @Test
@@ -2864,13 +2994,14 @@ public class TestSearchArgumentImpl {
         "leaf-1 = (LESS_THAN_EQUALS y hi)\n" +
         "leaf-2 = (EQUALS z 1.0)\n" +
         "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString());
+    assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString());
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
         .isNull("x")
         .between("y", new BigDecimal(10), 20.0)
-        .in("z", (byte)1, (short)2, (int)3)
+        .in("z", (byte) 1, (short) 2, (int) 3)
         .nullSafeEquals("a", new HiveVarchar("stinger", 100))
         .end()
         .end()
@@ -2880,6 +3011,11 @@ public class TestSearchArgumentImpl {
         "leaf-2 = (IN z 1 2 3)\n" +
         "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
         "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
+        "not(eq(a, Binary{\"stinger\"})))";
+    assertEquals(expected, p.toString());
   }
 
   @Test
@@ -2900,5 +3036,10 @@ public class TestSearchArgumentImpl {
         "leaf-3 = (EQUALS z 0.22)\n" +
         "leaf-4 = (EQUALS z1 0.22)\n" +
         "expr = (and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4)", sarg.toString());
+
+    FilterPredicate p = sarg.toFilterPredicate();
+    String expected = "and(and(and(and(lt(x, 22), lt(x1, 22)), lteq(y, Binary{\"hi\"})), eq(z, " +
+        "0.22)), eq(z1, 0.22))";
+    assertEquals(expected, p.toString());
   }
 }

Modified: hive/trunk/serde/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/serde/pom.xml?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/serde/pom.xml (original)
+++ hive/trunk/serde/pom.xml Tue Nov 18 18:53:53 2014
@@ -75,6 +75,11 @@
       <artifactId>opencsv</artifactId>
       <version>${opencsv.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>parquet-hadoop-bundle</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
 
       <!-- test inter-project -->
     <dependency>

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java Tue Nov 18 18:53:53 2014
@@ -43,7 +43,8 @@ public interface PredicateLeaf {
    * The possible types for sargs.
    */
   public static enum Type {
-    INTEGER, // all of the integer types
+    INTEGER, // all of the integer types except long
+    LONG,
     FLOAT,   // float and double
     STRING,  // string, char, varchar
     DATE,
@@ -53,12 +54,20 @@ public interface PredicateLeaf {
   }
 
   /**
+   * file format which supports search arguments
+   */
+  public static enum FileFormat {
+    ORC,
+    PARQUET
+  }
+
+  /**
    * Get the operator for the leaf.
    */
   public Operator getOperator();
 
   /**
-   * Get the type of the column and literal.
+   * Get the type of the column and literal by the file format.
    */
   public Type getType();
 
@@ -69,14 +78,17 @@ public interface PredicateLeaf {
   public String getColumnName();
 
   /**
-   * Get the literal half of the predicate leaf.
-   * @return a Long, Double, or String
+   * Get the literal half of the predicate leaf. Adapt the original type for what orc needs
+   * @return a Long, Double, or String for Orc and a Int, Long, Double, or String for parquet
    */
-  public Object getLiteral();
+  public Object getLiteral(FileFormat format);
 
   /**
    * For operators with multiple literals (IN and BETWEEN), get the literals.
-   * @return the list of literals (Longs, Doubles, or Strings)
+   *
+   * @return the list of literals (Longs, Doubles, or Strings) for orc or the list of literals
+   * (Integer, Longs, Doubles, or String) for parquet
    */
-  public List<Object> getLiteralList();
+  public List<Object> getLiteralList(FileFormat format);
+
 }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java?rev=1640409&r1=1640408&r2=1640409&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java Tue Nov 18 18:53:53 2014
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.io.sarg;
 
+import parquet.filter2.predicate.FilterPredicate;
+
 import java.util.List;
 
 /**
@@ -175,6 +177,12 @@ public interface SearchArgument {
   public String toKryo();
 
   /**
+   * Translate the search argument to the filter predicate parquet used
+   * @return
+   */
+  public FilterPredicate toFilterPredicate();
+
+  /**
    * A builder object for contexts outside of Hive where it isn't easy to
    * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
    * before adding any leaves.



Mime
View raw message