parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [2/3] parquet-mr git commit: PARQUET-1025: Support new min-max statistics in parquet-mr
Date Sat, 13 Jan 2018 00:29:54 GMT
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index 8056188..2d7491f 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -22,11 +22,14 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
 
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.ShouldNeverHappenException;
 import org.apache.parquet.column.ColumnReader;
 import org.apache.parquet.io.InvalidRecordException;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.io.api.PrimitiveConverter;
 import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
 
 
 /**
@@ -86,6 +89,26 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertINT64(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        if (logicalType == null) {
+          return PrimitiveComparator.SIGNED_INT64_COMPARATOR;
+        }
+        switch (logicalType) {
+        case UINT_64:
+          return PrimitiveComparator.UNSIGNED_INT64_COMPARATOR;
+        case INT_64:
+        case DECIMAL:
+        case TIME_MICROS:
+        case TIMESTAMP_MILLIS:
+        case TIMESTAMP_MICROS:
+          return PrimitiveComparator.SIGNED_INT64_COMPARATOR;
+        default:
+          throw new ShouldNeverHappenException(
+              "No comparator logic implemented for INT64 logical type: " + logicalType);
+        }
+      }
     },
     INT32("getInteger", Integer.TYPE) {
       @Override
@@ -109,6 +132,29 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertINT32(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        if (logicalType == null) {
+          return PrimitiveComparator.SIGNED_INT32_COMPARATOR;
+        }
+        switch (logicalType) {
+        case UINT_8:
+        case UINT_16:
+        case UINT_32:
+          return PrimitiveComparator.UNSIGNED_INT32_COMPARATOR;
+        case INT_8:
+        case INT_16:
+        case INT_32:
+        case DECIMAL:
+        case DATE:
+        case TIME_MILLIS:
+          return PrimitiveComparator.SIGNED_INT32_COMPARATOR;
+        default:
+          throw new ShouldNeverHappenException(
+              "No comparator logic implemented for INT32 logical type: " + logicalType);
+        }
+      }
     },
     BOOLEAN("getBoolean", Boolean.TYPE) {
       @Override
@@ -132,6 +178,11 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertBOOLEAN(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        return PrimitiveComparator.BOOLEAN_COMPARATOR;
+      }
     },
     BINARY("getBinary", Binary.class) {
       @Override
@@ -155,6 +206,25 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertBINARY(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        if (logicalType == null) {
+          return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
+        }
+        switch (logicalType) {
+        case DECIMAL:
+          return PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
+        case UTF8:
+        case ENUM:
+        case JSON:
+        case BSON:
+          return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
+        default:
+          throw new ShouldNeverHappenException(
+              "No comparator logic implemented for BINARY logical type: " + logicalType);
+        }
+      }
     },
     FLOAT("getFloat", Float.TYPE) {
       @Override
@@ -178,6 +248,11 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertFLOAT(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        return PrimitiveComparator.FLOAT_COMPARATOR;
+      }
     },
     DOUBLE("getDouble", Double.TYPE) {
       @Override
@@ -201,6 +276,11 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertDOUBLE(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        return PrimitiveComparator.DOUBLE_COMPARATOR;
+      }
     },
     INT96("getBinary", Binary.class) {
       @Override
@@ -222,6 +302,11 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertINT96(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        return PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
+      }
     },
     FIXED_LEN_BYTE_ARRAY("getBinary", Binary.class) {
       @Override
@@ -245,6 +330,22 @@ public final class PrimitiveType extends Type {
       public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
         return converter.convertFIXED_LEN_BYTE_ARRAY(this);
       }
+
+      @Override
+      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+        if (logicalType == null) {
+          return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
+        }
+        switch (logicalType) {
+        case DECIMAL:
+          return PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
+        case INTERVAL:
+          return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
+        default:
+          throw new ShouldNeverHappenException(
+              "No comparator logic implemented for FIXED_LEN_BYTE_ARRAY logical type: " + logicalType);
+        }
+      }
     };
 
     public final String getMethod;
@@ -275,11 +376,14 @@ public final class PrimitiveType extends Type {
 
     abstract public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E;
 
+    abstract PrimitiveComparator<?> comparator(OriginalType logicalType);
+
   }
 
   private final PrimitiveTypeName primitive;
   private final int length;
   private final DecimalMetadata decimalMeta;
+  private final ColumnOrder columnOrder;
 
   /**
    * @param repetition OPTIONAL, REPEATED, REQUIRED
@@ -337,10 +441,61 @@ public final class PrimitiveType extends Type {
   public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
                        int length, String name, OriginalType originalType,
                        DecimalMetadata decimalMeta, ID id) {
+    this(repetition, primitive, length, name, originalType, decimalMeta, id, null);
+  }
+
+  PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
+      int length, String name, OriginalType originalType,
+      DecimalMetadata decimalMeta, ID id, ColumnOrder columnOrder) {
     super(name, repetition, originalType, id);
     this.primitive = primitive;
     this.length = length;
     this.decimalMeta = decimalMeta;
+
+    if (columnOrder == null) {
+      columnOrder = primitive == PrimitiveTypeName.INT96 || originalType == OriginalType.INTERVAL
+          ? ColumnOrder.undefined()
+          : ColumnOrder.typeDefined();
+    }
+    this.columnOrder = requireValidColumnOrder(columnOrder);
+  }
+
+  private ColumnOrder requireValidColumnOrder(ColumnOrder columnOrder) {
+    if (primitive == PrimitiveTypeName.INT96) {
+      Preconditions.checkArgument(columnOrder.getColumnOrderName() == ColumnOrderName.UNDEFINED,
+          "The column order {} is not supported by INT96", columnOrder);
+    }
+    if (getOriginalType() != null) {
+      // Explicitly listing all the logical types to avoid having unsupported column orders new types accidentally
+      switch (getOriginalType()) {
+        case INT_8:
+        case INT_16:
+        case INT_32:
+        case INT_64:
+        case UINT_8:
+        case UINT_16:
+        case UINT_32:
+        case UINT_64:
+        case UTF8:
+        case DECIMAL:
+        case DATE:
+        case TIME_MILLIS:
+        case TIME_MICROS:
+        case TIMESTAMP_MILLIS:
+        case TIMESTAMP_MICROS:
+        case ENUM:
+        case JSON:
+        case BSON:
+          // Currently any available column order is valid
+          break;
+        case INTERVAL:
+        default:
+          Preconditions.checkArgument(columnOrder.getColumnOrderName() == ColumnOrderName.UNDEFINED,
+              "The column order {} is not supported by {} ({})", columnOrder, primitive, getOriginalType());
+          break;
+      }
+    }
+    return columnOrder;
   }
 
   /**
@@ -349,7 +504,8 @@ public final class PrimitiveType extends Type {
    */
   @Override
   public PrimitiveType withId(int id) {
-    return new PrimitiveType(getRepetition(), primitive, length, getName(), getOriginalType(), decimalMeta, new ID(id));
+    return new PrimitiveType(getRepetition(), primitive, length, getName(), getOriginalType(), decimalMeta, new ID(id),
+        columnOrder);
   }
 
   /**
@@ -441,6 +597,7 @@ public final class PrimitiveType extends Type {
     return super.equals(other)
         && primitive == otherPrimitive.getPrimitiveTypeName()
         && length == otherPrimitive.length
+        && columnOrder.equals(otherPrimitive.columnOrder)
         && eqOrBothNull(decimalMeta, otherPrimitive.decimalMeta);
   }
 
@@ -452,6 +609,7 @@ public final class PrimitiveType extends Type {
     int hash = super.hashCode();
     hash = hash * 31 + primitive.hashCode();
     hash = hash * 31 + length;
+    hash = hash * 31 + columnOrder.hashCode();
     if (decimalMeta != null) {
       hash = hash * 31 + decimalMeta.hashCode();
     }
@@ -519,6 +677,11 @@ public final class PrimitiveType extends Type {
     throw new IncompatibleSchemaModificationException("can not merge type " + toMerge + " into " + this);
   }
 
+  private void reportSchemaMergeErrorWithColumnOrder(Type toMerge) {
+    throw new IncompatibleSchemaModificationException("can not merge type " + toMerge + " with column order "
+        + toMerge.asPrimitiveType().columnOrder() + " into " + this + " with column order " + columnOrder());
+  }
+
   @Override
   protected Type union(Type toMerge, boolean strict) {
     if (!toMerge.isPrimitive()) {
@@ -537,6 +700,11 @@ public final class PrimitiveType extends Type {
       if (primitive == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY && length != toMergeLength) {
         reportSchemaMergeError(toMerge);
       }
+
+      // Can't merge primitive fields with different column orders
+      if (!columnOrder().equals(toMerge.asPrimitiveType().columnOrder())) {
+        reportSchemaMergeErrorWithColumnOrder(toMerge);
+      }
     }
 
     Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(primitive, toMerge.getRepetition());
@@ -547,4 +715,21 @@ public final class PrimitiveType extends Type {
 
     return builder.as(getOriginalType()).named(getName());
   }
+
+  /**
+   * Returns the {@link Type} specific comparator for properly comparing values. The natural ordering of the values
+   * might not proper in certain cases (e.g. {@code UINT_32} requires unsigned comparison of {@code int} values while
+   * the natural ordering is signed.)
+   */
+  @SuppressWarnings("unchecked")
+  public <T> PrimitiveComparator<T> comparator() {
+    return (PrimitiveComparator<T>) getPrimitiveTypeName().comparator(getOriginalType());
+  }
+
+  /**
+   * @return the column order for this type
+   */
+  public ColumnOrder columnOrder() {
+    return columnOrder;
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index e81daae..0422a9d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -23,6 +23,7 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.parquet.Preconditions;
+import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type.ID;
 import org.slf4j.Logger;
@@ -316,6 +317,7 @@ public class Types {
     private int length = NOT_SET;
     private int precision = NOT_SET;
     private int scale = NOT_SET;
+    private ColumnOrder columnOrder;
 
     private BasePrimitiveBuilder(P parent, PrimitiveTypeName type) {
       super(parent);
@@ -374,6 +376,22 @@ public class Types {
       return self();
     }
 
+    /**
+     * Adds the column order for the primitive type.
+     * <p>
+     * In case of not set the default column order is {@link ColumnOrderName#TYPE_DEFINED_ORDER} except the type
+     * {@link PrimitiveTypeName#INT96} and the types annotated by {@link OriginalType#INTERVAL} where the default column
+     * order is {@link ColumnOrderName#UNDEFINED}.
+     *
+     * @param columnOrder
+     *          the column order for the primitive type
+     * @return this builder for method chaining
+     */
+    public THIS columnOrder(ColumnOrder columnOrder) {
+      this.columnOrder = columnOrder;
+      return self();
+    }
+
     @Override
     protected PrimitiveType build(String name) {
       if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitiveType) {
@@ -457,7 +475,7 @@ public class Types {
         }
       }
 
-      return new PrimitiveType(repetition, primitiveType, length, name, originalType, meta, id);
+      return new PrimitiveType(repetition, primitiveType, length, name, originalType, meta, id, columnOrder);
     }
 
     private static long maxPrecision(int numBytes) {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
index 690c7e1..476fbb3 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
@@ -74,6 +74,13 @@ public class TestStatistics {
     assertEquals(statsNeg.getMax(), 54);
     assertEquals(statsNeg.getMin(), -66);
 
+    assertTrue(statsNeg.compareMaxToValue(55) < 0);
+    assertTrue(statsNeg.compareMaxToValue(54) == 0);
+    assertTrue(statsNeg.compareMaxToValue(5) > 0);
+    assertTrue(statsNeg.compareMinToValue(0) < 0);
+    assertTrue(statsNeg.compareMinToValue(-66) == 0);
+    assertTrue(statsNeg.compareMinToValue(-67) > 0);
+
     // Test converting to and from byte[]
     byte[] intMaxBytes = statsNeg.getMaxBytes();
     byte[] intMinBytes = statsNeg.getMinBytes();
@@ -135,6 +142,13 @@ public class TestStatistics {
     assertEquals(statsNeg.getMax(), 993);
     assertEquals(statsNeg.getMin(), -9914);
 
+    assertTrue(statsNeg.compareMaxToValue(994) < 0);
+    assertTrue(statsNeg.compareMaxToValue(993) == 0);
+    assertTrue(statsNeg.compareMaxToValue(-1000) > 0);
+    assertTrue(statsNeg.compareMinToValue(10000) < 0);
+    assertTrue(statsNeg.compareMinToValue(-9914) == 0);
+    assertTrue(statsNeg.compareMinToValue(-9915) > 0);
+
     // Test converting to and from byte[]
     byte[] longMaxBytes = statsNeg.getMaxBytes();
     byte[] longMinBytes = statsNeg.getMinBytes();
@@ -196,6 +210,13 @@ public class TestStatistics {
     assertEquals(statsNeg.getMax(), 0.65f, 1e-10);
     assertEquals(statsNeg.getMin(), -412.99f, 1e-10);
 
+    assertTrue(statsNeg.compareMaxToValue(1) < 0);
+    assertTrue(statsNeg.compareMaxToValue(0.65F) == 0);
+    assertTrue(statsNeg.compareMaxToValue(0.649F) > 0);
+    assertTrue(statsNeg.compareMinToValue(-412.98F) < 0);
+    assertTrue(statsNeg.compareMinToValue(-412.99F) == 0);
+    assertTrue(statsNeg.compareMinToValue(-450) > 0);
+
     // Test converting to and from byte[]
     byte[] floatMaxBytes = statsNeg.getMaxBytes();
     byte[] floatMinBytes = statsNeg.getMinBytes();
@@ -257,6 +278,13 @@ public class TestStatistics {
     assertEquals(statsNeg.getMax(), 23.0d, 1e-10);
     assertEquals(statsNeg.getMin(), -944.5d, 1e-10);
 
+    assertTrue(statsNeg.compareMaxToValue(23.0001D) < 0);
+    assertTrue(statsNeg.compareMaxToValue(23D) == 0);
+    assertTrue(statsNeg.compareMaxToValue(0D) > 0);
+    assertTrue(statsNeg.compareMinToValue(-400D) < 0);
+    assertTrue(statsNeg.compareMinToValue(-944.5D) == 0);
+    assertTrue(statsNeg.compareMinToValue(-944.500001D) > 0);
+
     // Test converting to and from byte[]
     byte[] doubleMaxBytes = statsNeg.getMaxBytes();
     byte[] doubleMinBytes = statsNeg.getMinBytes();

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java b/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
index a541e1b..0815597 100644
--- a/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
+++ b/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
@@ -248,4 +248,24 @@ public class TestBinary {
 
     testSerializable(bf, reused);
   }
+
+  @Test
+  public void testCompare() {
+    Binary b1 = Binary.fromCharSequence("aaaaaaaa");
+    Binary b2 = Binary.fromString("aaaaaaab");
+    Binary b3 = Binary.fromReusedByteArray("aaaaaaaaaaa".getBytes(), 1, 8);
+    Binary b4 = Binary.fromConstantByteBuffer(ByteBuffer.wrap("aaaaaaac".getBytes()));
+
+    assertTrue(b1.compareTo(b2) < 0);
+    assertTrue(b2.compareTo(b1) > 0);
+    assertTrue(b3.compareTo(b4) < 0);
+    assertTrue(b4.compareTo(b3) > 0);
+    assertTrue(b1.compareTo(b4) < 0);
+    assertTrue(b4.compareTo(b1) > 0);
+    assertTrue(b2.compareTo(b4) < 0);
+    assertTrue(b4.compareTo(b2) > 0);
+
+    assertTrue(b1.compareTo(b3) == 0);
+    assertTrue(b3.compareTo(b1) == 0);
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
index 4add174..0561938 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
@@ -21,9 +21,12 @@ package org.apache.parquet.schema;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
 import static org.apache.parquet.schema.OriginalType.LIST;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
 import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
 import static org.apache.parquet.schema.Type.Repetition.REPEATED;
 import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
@@ -189,6 +192,48 @@ public class TestMessageType {
   }
 
   @Test
+  public void testMergeSchemaWithColumnOrder() {
+    MessageType m1 = Types.buildMessage().addFields(
+        Types.requiredList().element(
+            Types.optional(BINARY).columnOrder(ColumnOrder.undefined()).named("a")
+            ).named("g"),
+        Types.optional(INT96).named("b")
+        ).named("root");
+    MessageType m2 = Types.buildMessage().addFields(
+        Types.requiredList().element(
+            Types.optional(BINARY).columnOrder(ColumnOrder.undefined()).named("a")
+            ).named("g"),
+        Types.optional(BINARY).named("c")
+        ).named("root");
+    MessageType m3 = Types.buildMessage().addFields(
+        Types.requiredList().element(
+            Types.optional(BINARY).named("a")
+            ).named("g")
+        ).named("root");
+
+    assertEquals(
+        Types.buildMessage().addFields(
+            Types.requiredList().element(
+                Types.optional(BINARY).named("a")
+                ).named("g"),
+            Types.optional(INT96).named("b"),
+            Types.optional(BINARY).named("c")
+            ).named("root"),
+        m1.union(m2));
+    try {
+      m1.union(m3);
+      fail("An IncompatibleSchemaModificationException should have been thrown");
+    } catch (Exception e) {
+      assertTrue(
+          "The thrown exception should have been IncompatibleSchemaModificationException but was " + e.getClass(),
+          e instanceof IncompatibleSchemaModificationException);
+      assertEquals(
+          "can not merge type optional binary a with column order TYPE_DEFINED_ORDER into optional binary a with column order UNDEFINED",
+          e.getMessage());
+    }
+  }
+
+  @Test
   public void testIDs() throws Exception {
     MessageType schema = new MessageType("test",
         new PrimitiveType(REQUIRED, BINARY, "foo").withId(4),

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java
new file mode 100644
index 0000000..3f9d643
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import org.apache.parquet.io.api.Binary;
+import org.junit.Test;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+
+import static org.apache.parquet.schema.PrimitiveComparator.BOOLEAN_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.DOUBLE_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.FLOAT_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.SIGNED_INT32_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.SIGNED_INT64_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.UNSIGNED_INT32_COMPARATOR;
+import static org.apache.parquet.schema.PrimitiveComparator.UNSIGNED_INT64_COMPARATOR;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+/*
+ * This test verifies all the PrimitiveComparator implementations. The logic of all tests is the same: list the
+ * elements to be tested in ascending order and then compare every elements to each other (including the element
+ * itself) and expect the related value based on the defined order.
+ */
+public class TestPrimitiveComparator {
+
+  @Test
+  public void testBooleanComparator() {
+    Boolean[] valuesInAscendingOrder = { null, false, true };
+
+    for (int i = 0; i < valuesInAscendingOrder.length; ++i) {
+      for (int j = 0; j < valuesInAscendingOrder.length; ++j) {
+        Boolean vi = valuesInAscendingOrder[i];
+        Boolean vj = valuesInAscendingOrder[j];
+        int exp = i - j;
+        assertSignumEquals(vi, vj, exp, BOOLEAN_COMPARATOR.compare(vi, vj));
+        if (vi != null && vj != null) {
+          assertSignumEquals(vi, vj, exp, BOOLEAN_COMPARATOR.compare(vi.booleanValue(), vj.booleanValue()));
+        }
+      }
+    }
+
+    checkThrowingUnsupportedException(BOOLEAN_COMPARATOR, Boolean.TYPE);
+  }
+
+  @Test
+  public void testSignedInt32Comparator() {
+    testInt32Comparator(SIGNED_INT32_COMPARATOR,
+        null,
+        Integer.MIN_VALUE,
+        -12345,
+        -1,
+        0,
+        1,
+        12345,
+        Integer.MAX_VALUE);
+  }
+
+  @Test
+  public void testUnsignedInt32Comparator() {
+    testInt32Comparator(UNSIGNED_INT32_COMPARATOR,
+        null,
+        0,                  // 0x00000000
+        1,                  // 0x00000001
+        12345,              // 0x00003039
+        Integer.MAX_VALUE,  // 0x7FFFFFFF
+        Integer.MIN_VALUE,  // 0x80000000
+        -12345,             // 0xFFFFCFC7
+        -1);                // 0xFFFFFFFF
+  }
+
+  private void testInt32Comparator(PrimitiveComparator<Integer> comparator, Integer... valuesInAscendingOrder) {
+    for (int i = 0; i < valuesInAscendingOrder.length; ++i) {
+      for (int j = 0; j < valuesInAscendingOrder.length; ++j) {
+        Integer vi = valuesInAscendingOrder[i];
+        Integer vj = valuesInAscendingOrder[j];
+        int exp = i - j;
+        assertSignumEquals(vi, vj, exp, comparator.compare(vi, vj));
+        if (vi != null && vj != null) {
+          assertSignumEquals(vi, vj, exp, comparator.compare(vi.intValue(), vj.intValue()));
+        }
+      }
+    }
+
+    checkThrowingUnsupportedException(comparator, Integer.TYPE);
+  }
+
+  @Test
+  public void testSignedInt64Comparator() {
+    testInt64Comparator(SIGNED_INT64_COMPARATOR,
+        null,
+        Long.MIN_VALUE,
+        -12345678901L,
+        -1L,
+        0L,
+        1L,
+        12345678901L,
+        Long.MAX_VALUE);
+  }
+
+  @Test
+  public void testUnsignedInt64Comparator() {
+    testInt64Comparator(UNSIGNED_INT64_COMPARATOR,
+        null,
+        0L,              // 0x0000000000000000
+        1L,              // 0x0000000000000001
+        12345678901L,    // 0x00000002DFDC1C35
+        Long.MAX_VALUE,  // 0x7FFFFFFFFFFFFFFF
+        Long.MIN_VALUE,  // 0x8000000000000000
+        -12345678901L,   // 0xFFFFFFFD2023E3CB
+        -1L);            // 0xFFFFFFFFFFFFFFFF
+  }
+
+  private void testInt64Comparator(PrimitiveComparator<Long> comparator, Long... valuesInAscendingOrder) {
+    for (int i = 0; i < valuesInAscendingOrder.length; ++i) {
+      for (int j = 0; j < valuesInAscendingOrder.length; ++j) {
+        Long vi = valuesInAscendingOrder[i];
+        Long vj = valuesInAscendingOrder[j];
+        int exp = i - j;
+        assertSignumEquals(vi, vj, exp, comparator.compare(vi, vj));
+        if (vi != null && vj != null) {
+          assertSignumEquals(vi, vj, exp, comparator.compare(vi.longValue(), vj.longValue()));
+        }
+      }
+    }
+
+    checkThrowingUnsupportedException(comparator, Long.TYPE);
+  }
+
+  @Test
+  public void testFloatComparator() {
+    Float[] valuesInAscendingOrder = {
+        null,
+        Float.NEGATIVE_INFINITY,
+        -Float.MAX_VALUE,
+        -1234.5678F,
+        -Float.MIN_VALUE,
+        0.0F,
+        Float.MIN_VALUE,
+        1234.5678F,
+        Float.MAX_VALUE,
+        Float.POSITIVE_INFINITY };
+
+    for (int i = 0; i < valuesInAscendingOrder.length; ++i) {
+      for (int j = 0; j < valuesInAscendingOrder.length; ++j) {
+        Float vi = valuesInAscendingOrder[i];
+        Float vj = valuesInAscendingOrder[j];
+        int exp = i - j;
+        assertSignumEquals(vi, vj, exp, FLOAT_COMPARATOR.compare(vi, vj));
+        if (vi != null && vj != null) {
+          assertSignumEquals(vi, vj, exp, FLOAT_COMPARATOR.compare(vi.floatValue(), vj.floatValue()));
+        }
+      }
+    }
+
+    checkThrowingUnsupportedException(FLOAT_COMPARATOR, Float.TYPE);
+  }
+
+  @Test
+  public void testDoubleComparator() {
+    Double[] valuesInAscendingOrder = {
+        null,
+        Double.NEGATIVE_INFINITY,
+        -Double.MAX_VALUE,
+        -123456.7890123456789,
+        -Double.MIN_VALUE,
+        0.0,
+        Double.MIN_VALUE,
+        123456.7890123456789,
+        Double.MAX_VALUE,
+        Double.POSITIVE_INFINITY };
+
+    for (int i = 0; i < valuesInAscendingOrder.length; ++i) {
+      for (int j = 0; j < valuesInAscendingOrder.length; ++j) {
+        Double vi = valuesInAscendingOrder[i];
+        Double vj = valuesInAscendingOrder[j];
+        int exp = i - j;
+        assertSignumEquals(vi, vj, exp, DOUBLE_COMPARATOR.compare(vi, vj));
+        if (vi != null && vj != null) {
+          assertSignumEquals(vi, vj, exp, DOUBLE_COMPARATOR.compare(vi.doubleValue(), vj.doubleValue()));
+        }
+      }
+    }
+
+    checkThrowingUnsupportedException(DOUBLE_COMPARATOR, Double.TYPE);
+  }
+
+  @Test
+  public void testLexicographicalBinaryComparator() {
+    testObjectComparator(UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR,
+        null,
+        Binary.fromConstantByteArray(new byte[0]),                                           // ||
+        Binary.fromConstantByteArray(new byte[] { 127, 127, 0, 127 }, 2, 1),                 // |00|
+        Binary.fromCharSequence("aaa"),                                                      // |61|61|61|
+        Binary.fromString("aaaa"),                                                           // |61|61|61|61|
+        Binary.fromReusedByteArray("aaab".getBytes()),                                       // |61|61|61|62|
+        Binary.fromReusedByteArray("azzza".getBytes(), 1, 3),                                // |7A|7A|7A|
+        Binary.fromReusedByteBuffer(ByteBuffer.wrap("zzzzzz".getBytes())),                   // |7A|7A|7A|7A|7A|7A|
+        Binary.fromReusedByteBuffer(ByteBuffer.wrap("aazzzzzzaa".getBytes(), 2, 7)),         // |7A|7A|7A|7A|7A|7A|61|
+        Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { -128, -128, -128 })),     // |80|80|80|
+        Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { -128, -128, -1 }, 1, 2))  // |80|FF|
+    );
+  }
+
+  @Test
+  public void testBinaryAsSignedIntegerComparator() {
+    testObjectComparator(BINARY_AS_SIGNED_INTEGER_COMPARATOR,
+        null,
+        Binary.fromConstantByteArray(new BigInteger("-9999999999999999999999999999999999999999").toByteArray()),
+        Binary.fromReusedByteArray(new BigInteger("-9999999999999999999999999999999999999998").toByteArray()),
+        Binary.fromConstantByteArray(BigInteger.valueOf(Long.MIN_VALUE).subtract(BigInteger.ONE).toByteArray()),
+        Binary.fromConstantByteArray(BigInteger.valueOf(Long.MIN_VALUE).toByteArray()),
+        Binary.fromConstantByteArray(BigInteger.valueOf(Long.MIN_VALUE).add(BigInteger.ONE).toByteArray()),
+        Binary.fromReusedByteArray(new byte[] { (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, -2 }, 1, 3),
+        Binary.fromReusedByteArray(new BigInteger("-1").toByteArray()),
+        Binary.fromConstantByteBuffer(ByteBuffer.wrap(new BigInteger("0").toByteArray())),
+        Binary.fromReusedByteBuffer(ByteBuffer.wrap(new byte[] { 0, 0, 0, 1 })),
+        Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { 0, 0, 0, 2 }), 2, 2),
+        Binary.fromConstantByteBuffer(
+            ByteBuffer.wrap(BigInteger.valueOf(Long.MAX_VALUE).subtract(BigInteger.ONE).toByteArray())),
+        Binary.fromConstantByteBuffer(ByteBuffer.wrap(BigInteger.valueOf(Long.MAX_VALUE).toByteArray())),
+        Binary
+            .fromConstantByteBuffer(
+                ByteBuffer.wrap(BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE).toByteArray())),
+        Binary.fromConstantByteBuffer(
+            ByteBuffer.wrap(new BigInteger("999999999999999999999999999999999999999").toByteArray())),
+        Binary.fromReusedByteBuffer(
+            ByteBuffer.wrap(new BigInteger("9999999999999999999999999999999999999998").toByteArray())),
+        Binary.fromConstantByteBuffer(
+            ByteBuffer.wrap(new BigInteger("9999999999999999999999999999999999999999").toByteArray())));
+  }
+
+  private <T> void testObjectComparator(PrimitiveComparator<T> comparator, T... valuesInAscendingOrder) {
+    for (int i = 0; i < valuesInAscendingOrder.length; ++i) {
+      for (int j = 0; j < valuesInAscendingOrder.length; ++j) {
+        T vi = valuesInAscendingOrder[i];
+        T vj = valuesInAscendingOrder[j];
+        int exp = i - j;
+        assertSignumEquals(vi, vj, exp, comparator.compare(vi, vj));
+      }
+    }
+
+    checkThrowingUnsupportedException(comparator, null);
+  }
+
+  private <T> void assertSignumEquals(T v1, T v2, int expected, int actual) {
+    String sign = expected < 0 ? " < " : expected > 0 ? " > " : " = ";
+    assertEquals("expected: " + v1 + sign + v2, signum(expected), signum(actual));
+  }
+
+  private int signum(int i) {
+    return i < 0 ? -1 : i > 0 ? 1 : 0;
+  }
+
+  private void checkThrowingUnsupportedException(PrimitiveComparator<?> comparator, Class<?> exclude) {
+    if (Integer.TYPE != exclude) {
+      try {
+        comparator.compare(0, 0);
+        fail("An UnsupportedOperationException should have been thrown");
+      } catch (UnsupportedOperationException e) {
+      }
+    }
+    if (Long.TYPE != exclude) {
+      try {
+        comparator.compare(0L, 0L);
+        fail("An UnsupportedOperationException should have been thrown");
+      } catch (UnsupportedOperationException e) {
+      }
+    }
+    if (Float.TYPE != exclude) {
+      try {
+        comparator.compare(0.0F, 0.0F);
+        fail("An UnsupportedOperationException should have been thrown");
+      } catch (UnsupportedOperationException e) {
+      }
+    }
+    if (Double.TYPE != exclude) {
+      try {
+        comparator.compare(0.0, 0.0);
+        fail("An UnsupportedOperationException should have been thrown");
+      } catch (UnsupportedOperationException e) {
+      }
+    }
+    if (Boolean.TYPE != exclude) {
+      try {
+        comparator.compare(false, false);
+        fail("An UnsupportedOperationException should have been thrown");
+      } catch (UnsupportedOperationException e) {
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
index 0c39ef2..0b1f41a 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
@@ -24,6 +24,7 @@ import java.util.concurrent.Callable;
 import org.junit.Assert;
 import org.junit.Test;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type.Repetition;
 
 import static org.apache.parquet.schema.OriginalType.*;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
@@ -1348,6 +1349,52 @@ public class TestTypeBuilders {
     Assert.assertEquals(expected, actual);
   }
 
+  @Test
+  public void testTypeConstructionWithUndefinedColumnOrder() {
+    PrimitiveTypeName[] types = new PrimitiveTypeName[] {
+        BOOLEAN, INT32, INT64, INT96, FLOAT, DOUBLE, BINARY, FIXED_LEN_BYTE_ARRAY
+    };
+    for (PrimitiveTypeName type : types) {
+      String name = type.toString() + "_";
+      int len = type == FIXED_LEN_BYTE_ARRAY ? 42 : 0;
+      PrimitiveType expected = new PrimitiveType(Repetition.OPTIONAL, type, len, name, null, null, null,
+          ColumnOrder.undefined());
+      PrimitiveType built = Types.optional(type).length(len).columnOrder(ColumnOrder.undefined()).named(name);
+      Assert.assertEquals(expected, built);
+    }
+  }
+
+  @Test
+  public void testTypeConstructionWithTypeDefinedColumnOrder() {
+    PrimitiveTypeName[] types = new PrimitiveTypeName[] {
+        BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BINARY, FIXED_LEN_BYTE_ARRAY
+    };
+    for (PrimitiveTypeName type : types) {
+      String name = type.toString() + "_";
+      int len = type == FIXED_LEN_BYTE_ARRAY ? 42 : 0;
+      PrimitiveType expected = new PrimitiveType(Repetition.OPTIONAL, type, len, name, null, null, null,
+          ColumnOrder.typeDefined());
+      PrimitiveType built = Types.optional(type).length(len).columnOrder(ColumnOrder.typeDefined()).named(name);
+      Assert.assertEquals(expected, built);
+    }
+  }
+
+  @Test
+  public void testTypeConstructionWithUnsupportedColumnOrder() {
+    assertThrows(null, IllegalArgumentException.class, new Callable<PrimitiveType>() {
+      @Override
+      public PrimitiveType call() {
+        return Types.optional(INT96).columnOrder(ColumnOrder.typeDefined()).named("int96_unsupported");
+      }
+    });
+    assertThrows(null, IllegalArgumentException.class, new Callable<PrimitiveType>() {
+      @Override
+      public PrimitiveType call() {
+        return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL)
+            .columnOrder(ColumnOrder.typeDefined()).named("interval_unsupported");
+      }
+    });
+  }
 
   /**
    * A convenience method to avoid a large number of @Test(expected=...) tests

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
----------------------------------------------------------------------
diff --git a/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java b/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
index 1dfaf6f..fc5413e 100644
--- a/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
+++ b/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
@@ -45,29 +45,29 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
   private static class TypeInfo {
     public final String className;
     public final String primitiveName;
-    public final boolean useComparable;
     public final boolean supportsInequality;
 
-    private TypeInfo(String className, String primitiveName, boolean useComparable, boolean supportsInequality) {
+    private TypeInfo(String className, String primitiveName, boolean supportsInequality) {
       this.className = className;
       this.primitiveName = primitiveName;
-      this.useComparable = useComparable;
       this.supportsInequality = supportsInequality;
     }
   }
 
   private static final TypeInfo[] TYPES = new TypeInfo[]{
-    new TypeInfo("Integer", "int", false, true),
-    new TypeInfo("Long", "long", false, true),
-    new TypeInfo("Boolean", "boolean", false, false),
-    new TypeInfo("Float", "float", false, true),
-    new TypeInfo("Double", "double", false, true),
-    new TypeInfo("Binary", "Binary", true, true),
+    new TypeInfo("Integer", "int",  true),
+    new TypeInfo("Long", "long", true),
+    new TypeInfo("Boolean", "boolean", false),
+    new TypeInfo("Float", "float", true),
+    new TypeInfo("Double", "double", true),
+    new TypeInfo("Binary", "Binary", true),
   };
 
   public void run() throws IOException {
     add("package org.apache.parquet.filter2.recordlevel;\n" +
         "\n" +
+        "import java.util.List;\n" +
+        "\n" +
         "import org.apache.parquet.hadoop.metadata.ColumnPath;\n" +
         "import org.apache.parquet.filter2.predicate.Operators.Eq;\n" +
         "import org.apache.parquet.filter2.predicate.Operators.Gt;\n" +
@@ -79,7 +79,9 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
         "import org.apache.parquet.filter2.predicate.Operators.UserDefined;\n" +
         "import org.apache.parquet.filter2.predicate.UserDefinedPredicate;\n" +
         "import org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector;\n" +
-        "import org.apache.parquet.io.api.Binary;\n\n" +
+        "import org.apache.parquet.io.api.Binary;\n" +
+        "import org.apache.parquet.io.PrimitiveColumnIO;\n" +
+        "import org.apache.parquet.schema.PrimitiveComparator;\n\n" +
         "/**\n" +
         " * This class is auto-generated by {@link parquet.filter2.IncrementallyUpdatedFilterPredicateGenerator}\n" +
         " * Do not manually edit!\n" +
@@ -88,6 +90,10 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
 
     add("public class IncrementallyUpdatedFilterPredicateBuilder extends IncrementallyUpdatedFilterPredicateBuilderBase {\n\n");
 
+    add("  public IncrementallyUpdatedFilterPredicateBuilder(List<PrimitiveColumnIO> leaves) {\n" +
+      "    super(leaves);\n" +
+      "  }\n\n");
+
     addVisitBegin("Eq");
     for (TypeInfo info : TYPES) {
       addEqNotEqCase(info, true);
@@ -180,6 +186,7 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
         "        };\n" +
         "      } else {\n" +
         "        final " + info.primitiveName + " target = (" + info.className + ") (Object) pred.getValue();\n" +
+        "        final PrimitiveComparator<" + info.className + "> comparator = getComparator(columnPath);\n" +
         "\n" +
         "        valueInspector = new ValueInspector() {\n" +
         "          @Override\n" +
@@ -190,11 +197,7 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
         "          @Override\n" +
         "          public void update(" + info.primitiveName + " value) {\n");
 
-    if (info.useComparable) {
-      add("            setResult(" + compareEquality("value", "target", isEq) + ");\n");
-    } else {
-      add("            setResult(" + (isEq ? "value == target" : "value != target" )  + ");\n");
-    }
+    add("            setResult(" + compareEquality("value", "target", isEq) + ");\n");
 
     add("          }\n" +
         "        };\n" +
@@ -212,6 +215,7 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
 
     add("    if (clazz.equals(" + info.className + ".class)) {\n" +
         "      final " + info.primitiveName + " target = (" + info.className + ") (Object) pred.getValue();\n" +
+        "      final PrimitiveComparator<" + info.className + "> comparator = getComparator(columnPath);\n" +
         "\n" +
         "      valueInspector = new ValueInspector() {\n" +
         "        @Override\n" +
@@ -222,11 +226,8 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
         "        @Override\n" +
         "        public void update(" + info.primitiveName + " value) {\n");
 
-    if (info.useComparable) {
-      add("          setResult(value.compareTo(target) " + op + " 0);\n");
-    } else {
-      add("          setResult(value " + op + " target);\n");
-    }
+    add("          setResult(comparator.compare(value, target) " + op + " 0);\n");
+
     add("        }\n" +
         "      };\n" +
         "    }\n\n");
@@ -260,7 +261,7 @@ public class IncrementallyUpdatedFilterPredicateGenerator {
   }
 
   private String compareEquality(String var, String target, boolean eq) {
-    return var + ".compareTo(" + target + ")" + (eq ? " == 0 " : " != 0");
+    return "comparator.compare(" + var + ", " + target + ")" + (eq ? " == 0 " : " != 0");
   }
 
   private void add(String s) throws IOException {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
index 19604ec..eaba2c1 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
@@ -33,6 +33,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -75,7 +76,7 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> {
 
   @SuppressWarnings("unchecked")
   private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
-    ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getType(), -1, -1);
+    ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
     DictionaryPage page = dictionaries.readDictionaryPage(col);
 
     // the chunk may not be dictionary-encoded
@@ -212,8 +213,9 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> {
         return BLOCK_MIGHT_MATCH;
       }
 
+      Comparator<T> comparator = meta.getPrimitiveType().comparator();
       for (T entry : dictSet) {
-        if (value.compareTo(entry) > 0) {
+        if (comparator.compare(value, entry) > 0) {
           return BLOCK_MIGHT_MATCH;
         }
       }
@@ -253,8 +255,9 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> {
         return BLOCK_MIGHT_MATCH;
       }
 
+      Comparator<T> comparator = meta.getPrimitiveType().comparator();
       for (T entry : dictSet) {
-        if (value.compareTo(entry) >= 0) {
+        if (comparator.compare(value, entry) >= 0) {
           return BLOCK_MIGHT_MATCH;
         }
       }
@@ -292,8 +295,9 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> {
         return BLOCK_MIGHT_MATCH;
       }
 
+      Comparator<T> comparator = meta.getPrimitiveType().comparator();
       for (T entry : dictSet) {
-        if (value.compareTo(entry) < 0) {
+        if (comparator.compare(value, entry) < 0) {
           return BLOCK_MIGHT_MATCH;
         }
       }
@@ -333,8 +337,9 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> {
         return BLOCK_MIGHT_MATCH;
       }
 
+      Comparator<T> comparator = meta.getPrimitiveType().comparator();
       for (T entry : dictSet) {
-        if (value.compareTo(entry) <= 0) {
+        if (comparator.compare(value, entry) <= 0) {
           return BLOCK_MIGHT_MATCH;
         }
       }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
index ac7132e..f168a60 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
@@ -134,7 +134,7 @@ public class StatisticsFilter implements FilterPredicate.Visitor<Boolean> {
     }
 
     // drop if value < min || value > max
-    return value.compareTo(stats.genericGetMin()) < 0 || value.compareTo(stats.genericGetMax()) > 0;
+    return stats.compareMinToValue(value) > 0 || stats.compareMaxToValue(value) < 0;
   }
 
   @Override
@@ -173,7 +173,7 @@ public class StatisticsFilter implements FilterPredicate.Visitor<Boolean> {
     }
 
     // drop if this is a column where min = max = value
-    return value.compareTo(stats.genericGetMin()) == 0 && value.compareTo(stats.genericGetMax()) == 0;
+    return stats.compareMinToValue(value) == 0 && stats.compareMaxToValue(value) == 0;
   }
 
   @Override
@@ -204,7 +204,7 @@ public class StatisticsFilter implements FilterPredicate.Visitor<Boolean> {
     T value = lt.getValue();
 
     // drop if value <= min
-    return  value.compareTo(stats.genericGetMin()) <= 0;
+    return stats.compareMinToValue(value) >= 0;
   }
 
   @Override
@@ -235,7 +235,7 @@ public class StatisticsFilter implements FilterPredicate.Visitor<Boolean> {
     T value = ltEq.getValue();
 
     // drop if value < min
-    return value.compareTo(stats.genericGetMin()) < 0;
+    return stats.compareMinToValue(value) > 0;
   }
 
   @Override
@@ -266,7 +266,7 @@ public class StatisticsFilter implements FilterPredicate.Visitor<Boolean> {
     T value = gt.getValue();
 
     // drop if value >= max
-    return value.compareTo(stats.genericGetMax()) >= 0;
+    return stats.compareMaxToValue(value) <= 0;
   }
 
   @Override
@@ -296,8 +296,8 @@ public class StatisticsFilter implements FilterPredicate.Visitor<Boolean> {
 
     T value = gtEq.getValue();
 
-    // drop if value >= max
-    return value.compareTo(stats.genericGetMax()) > 0;
+    // drop if value > max
+    return stats.compareMaxToValue(value) < 0;
   }
 
   @Override
@@ -356,7 +356,8 @@ public class StatisticsFilter implements FilterPredicate.Visitor<Boolean> {
     }
 
     org.apache.parquet.filter2.predicate.Statistics<T> udpStats =
-        new org.apache.parquet.filter2.predicate.Statistics<T>(stats.genericGetMin(), stats.genericGetMax());
+      new org.apache.parquet.filter2.predicate.Statistics<T>(stats.genericGetMin(), stats.genericGetMax(),
+        stats.comparator());
 
     if (inverted) {
       return udp.inverseCanDrop(udpStats);

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 163056c..ef59760 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -44,6 +44,7 @@ import org.apache.parquet.format.PageEncodingStats;
 import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.format.ColumnChunk;
 import org.apache.parquet.format.ColumnMetaData;
+import org.apache.parquet.format.ColumnOrder;
 import org.apache.parquet.format.ConvertedType;
 import org.apache.parquet.format.DataPageHeader;
 import org.apache.parquet.format.DataPageHeaderV2;
@@ -58,12 +59,14 @@ import org.apache.parquet.format.RowGroup;
 import org.apache.parquet.format.SchemaElement;
 import org.apache.parquet.format.Statistics;
 import org.apache.parquet.format.Type;
+import org.apache.parquet.format.TypeDefinedOrder;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.parquet.column.EncodingStats;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
@@ -79,6 +82,7 @@ import org.slf4j.LoggerFactory;
 // TODO: Lets split it up: https://issues.apache.org/jira/browse/PARQUET-310
 public class ParquetMetadataConverter {
 
+  private static final TypeDefinedOrder TYPE_DEFINED_ORDER = new TypeDefinedOrder();
   public static final MetadataFilter NO_FILTER = new NoFilter();
   public static final MetadataFilter SKIP_ROW_GROUPS = new SkipMetadataFilter();
   public static final long MAX_STATS_SIZE = 4096; // limit stats to 4k
@@ -135,9 +139,24 @@ public class ParquetMetadataConverter {
     }
 
     fileMetaData.setCreated_by(parquetMetadata.getFileMetaData().getCreatedBy());
+
+    fileMetaData.setColumn_orders(getColumnOrders(parquetMetadata.getFileMetaData().getSchema()));
+
     return fileMetaData;
   }
 
+  private List<ColumnOrder> getColumnOrders(MessageType schema) {
+    List<ColumnOrder> columnOrders = new ArrayList<>();
+    // Currently, only TypeDefinedOrder is supported, so we create a column order for each columns with
+    // TypeDefinedOrder even if some types (e.g. INT96) have undefined column orders.
+    for (int i = 0, n = schema.getPaths().size(); i < n; ++i) {
+      ColumnOrder columnOrder = new ColumnOrder();
+      columnOrder.setTYPE_ORDER(TYPE_DEFINED_ORDER);
+      columnOrders.add(columnOrder);
+    }
+    return columnOrders;
+  }
+
   // Visible for testing
   List<SchemaElement> toParquetSchema(MessageType schema) {
     List<SchemaElement> result = new ArrayList<SchemaElement>();
@@ -326,20 +345,37 @@ public class ParquetMetadataConverter {
   }
 
   public static Statistics toParquetStatistics(
-      org.apache.parquet.column.statistics.Statistics statistics) {
-    Statistics stats = new Statistics();
+      org.apache.parquet.column.statistics.Statistics stats) {
+    Statistics formatStats = new Statistics();
     // Don't write stats larger than the max size rather than truncating. The
     // rationale is that some engines may use the minimum value in the page as
     // the true minimum for aggregations and there is no way to mark that a
     // value has been truncated and is a lower bound and not in the page.
-    if (!statistics.isEmpty() && statistics.isSmallerThan(MAX_STATS_SIZE)) {
-      stats.setNull_count(statistics.getNumNulls());
-      if (statistics.hasNonNullValue()) {
-        stats.setMax(statistics.getMaxBytes());
-        stats.setMin(statistics.getMinBytes());
+    if (!stats.isEmpty() && stats.isSmallerThan(MAX_STATS_SIZE)) {
+      formatStats.setNull_count(stats.getNumNulls());
+      if (stats.hasNonNullValue()) {
+        byte[] min = stats.getMinBytes();
+        byte[] max = stats.getMaxBytes();
+
+        // Fill the former min-max statistics only if the comparison logic is
+        // signed so the logic of V1 and V2 stats are the same (which is
+        // trivially true for equal min-max values)
+        if (sortOrder(stats.type()) == SortOrder.SIGNED || Arrays.equals(min, max)) {
+          formatStats.setMin(min);
+          formatStats.setMax(max);
+        }
+
+        if (isMinMaxStatsSupported(stats.type()) || Arrays.equals(min, max)) {
+          formatStats.setMin_value(min);
+          formatStats.setMax_value(max);
+        }
       }
     }
-    return stats;
+    return formatStats;
+  }
+
+  private static boolean isMinMaxStatsSupported(PrimitiveType type) {
+    return type.columnOrder().getColumnOrderName() == ColumnOrderName.TYPE_DEFINED_ORDER;
   }
 
   /**
@@ -357,29 +393,42 @@ public class ParquetMetadataConverter {
   @Deprecated
   public static org.apache.parquet.column.statistics.Statistics fromParquetStatistics
       (String createdBy, Statistics statistics, PrimitiveTypeName type) {
-    return fromParquetStatisticsInternal(createdBy, statistics, type, defaultSortOrder(type));
+    return fromParquetStatisticsInternal(createdBy, statistics,
+        new PrimitiveType(Repetition.OPTIONAL, type, "fake_type"), defaultSortOrder(type));
   }
 
   // Visible for testing
   static org.apache.parquet.column.statistics.Statistics fromParquetStatisticsInternal
-      (String createdBy, Statistics statistics, PrimitiveTypeName type, SortOrder typeSortOrder) {
+      (String createdBy, Statistics formatStats, PrimitiveType type, SortOrder typeSortOrder) {
     // create stats object based on the column type
-    org.apache.parquet.column.statistics.Statistics stats = org.apache.parquet.column.statistics.Statistics.getStatsBasedOnType(type);
-    // If there was no statistics written to the footer, create an empty Statistics object and return
-
-    boolean isSet = statistics != null && statistics.isSetMax() && statistics.isSetMin();
-    boolean maxEqualsMin = isSet ? Arrays.equals(statistics.getMin(), statistics.getMax()) : false;
-    boolean sortOrdersMatch = SortOrder.SIGNED == typeSortOrder;
-    // NOTE: See docs in CorruptStatistics for explanation of why this check is needed
-    // The sort order is checked to avoid returning min/max stats that are not
-    // valid with the type's sort order. Currently, all stats are aggregated
-    // using a signed ordering, which isn't valid for strings or unsigned ints.
-    if (statistics != null && !CorruptStatistics.shouldIgnoreStatistics(createdBy, type) &&
-        ( sortOrdersMatch || maxEqualsMin)) {
-      if (isSet) {
-        stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array());
+    org.apache.parquet.column.statistics.Statistics stats = org.apache.parquet.column.statistics.Statistics.createStats(type);
+
+    if (formatStats != null) {
+      // Use the new V2 min-max statistics over the former one if it is filled
+      if (formatStats.isSetMin_value() && formatStats.isSetMax_value()) {
+        byte[] min = formatStats.min_value.array();
+        byte[] max = formatStats.max_value.array();
+        if (isMinMaxStatsSupported(type) || Arrays.equals(min, max)) {
+          stats.setMinMaxFromBytes(min, max);
+        }
+        stats.setNumNulls(formatStats.null_count);
+      } else {
+        boolean isSet = formatStats.isSetMax() && formatStats.isSetMin();
+        boolean maxEqualsMin = isSet ? Arrays.equals(formatStats.getMin(), formatStats.getMax()) : false;
+        boolean sortOrdersMatch = SortOrder.SIGNED == typeSortOrder;
+        // NOTE: See docs in CorruptStatistics for explanation of why this check is needed
+        // The sort order is checked to avoid returning min/max stats that are not
+        // valid with the type's sort order. In previous releases, all stats were
+        // aggregated using a signed byte-wise ordering, which isn't valid for all the
+        // types (e.g. strings, decimals etc.).
+        if (!CorruptStatistics.shouldIgnoreStatistics(createdBy, type.getPrimitiveTypeName()) &&
+            (sortOrdersMatch || maxEqualsMin)) {
+          if (isSet) {
+            stats.setMinMaxFromBytes(formatStats.min.array(), formatStats.max.array());
+          }
+          stats.setNumNulls(formatStats.null_count);
+        }
       }
-      stats.setNumNulls(statistics.null_count);
     }
     return stats;
   }
@@ -389,7 +438,7 @@ public class ParquetMetadataConverter {
     SortOrder expectedOrder = overrideSortOrderToSigned(type) ?
         SortOrder.SIGNED : sortOrder(type);
     return fromParquetStatisticsInternal(
-        createdBy, statistics, type.getPrimitiveTypeName(), expectedOrder);
+        createdBy, statistics, type, expectedOrder);
   }
 
   /**
@@ -827,7 +876,7 @@ public class ParquetMetadataConverter {
   }
 
   public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException {
-    MessageType messageType = fromParquetSchema(parquetMetadata.getSchema());
+    MessageType messageType = fromParquetSchema(parquetMetadata.getSchema(), parquetMetadata.getColumn_orders());
     List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
     List<RowGroup> row_groups = parquetMetadata.getRow_groups();
     if (row_groups != null) {
@@ -846,7 +895,7 @@ public class ParquetMetadataConverter {
           ColumnPath path = getPath(metaData);
           ColumnChunkMetaData column = ColumnChunkMetaData.get(
               path,
-              messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName(),
+              messageType.getType(path.toArray()).asPrimitiveType(),
               fromFormatCodec(metaData.codec),
               convertEncodingStats(metaData.getEncoding_stats()),
               fromFormatEncodings(metaData.encodings),
@@ -886,20 +935,22 @@ public class ParquetMetadataConverter {
   }
 
   // Visible for testing
-  MessageType fromParquetSchema(List<SchemaElement> schema) {
+  MessageType fromParquetSchema(List<SchemaElement> schema, List<ColumnOrder> columnOrders) {
     Iterator<SchemaElement> iterator = schema.iterator();
     SchemaElement root = iterator.next();
     Types.MessageTypeBuilder builder = Types.buildMessage();
     if (root.isSetField_id()) {
       builder.id(root.field_id);
     }
-    buildChildren(builder, iterator, root.getNum_children());
+    buildChildren(builder, iterator, root.getNum_children(), columnOrders, 0);
     return builder.named(root.name);
   }
 
   private void buildChildren(Types.GroupBuilder builder,
                              Iterator<SchemaElement> schema,
-                             int childrenCount) {
+                             int childrenCount,
+                             List<ColumnOrder> columnOrders,
+                             int columnCount) {
     for (int i = 0; i < childrenCount; i++) {
       SchemaElement schemaElement = schema.next();
 
@@ -918,11 +969,21 @@ public class ParquetMetadataConverter {
         if (schemaElement.isSetScale()) {
           primitiveBuilder.scale(schemaElement.scale);
         }
+        if (columnOrders != null) {
+          org.apache.parquet.schema.ColumnOrder columnOrder = fromParquetColumnOrder(columnOrders.get(columnCount));
+          // As per parquet format 2.4.0 no UNDEFINED order is supported. So, set undefined column order for the types
+          // where ordering is not supported.
+          if (columnOrder.getColumnOrderName() == ColumnOrderName.TYPE_DEFINED_ORDER
+              && (schemaElement.type == Type.INT96 || schemaElement.converted_type == ConvertedType.INTERVAL)) {
+            columnOrder = org.apache.parquet.schema.ColumnOrder.undefined();
+          }
+          primitiveBuilder.columnOrder(columnOrder);
+        }
         childBuilder = primitiveBuilder;
 
       } else {
         childBuilder = builder.group(fromParquetRepetition(schemaElement.repetition_type));
-        buildChildren((Types.GroupBuilder) childBuilder, schema, schemaElement.num_children);
+        buildChildren((Types.GroupBuilder) childBuilder, schema, schemaElement.num_children, columnOrders, columnCount);
       }
 
       if (schemaElement.isSetConverted_type()) {
@@ -933,6 +994,7 @@ public class ParquetMetadataConverter {
       }
 
       childBuilder.named(schemaElement.name);
+      ++columnCount;
     }
   }
 
@@ -946,6 +1008,14 @@ public class ParquetMetadataConverter {
     return Repetition.valueOf(repetition.name());
   }
 
+  private static org.apache.parquet.schema.ColumnOrder fromParquetColumnOrder(ColumnOrder columnOrder) {
+    if (columnOrder.isSetTYPE_ORDER()) {
+      return org.apache.parquet.schema.ColumnOrder.typeDefined();
+    }
+    // The column order is not yet supported by this API
+    return org.apache.parquet.schema.ColumnOrder.undefined();
+  }
+
   @Deprecated
   public void writeDataPageHeader(
       int uncompressedSize,
@@ -994,8 +1064,7 @@ public class ParquetMetadataConverter {
         getEncoding(dlEncoding),
         getEncoding(rlEncoding)));
     if (!statistics.isEmpty()) {
-      pageHeader.getData_page_header().setStatistics(
-          toParquetStatistics(statistics));
+      pageHeader.getData_page_header().setStatistics(toParquetStatistics(statistics));
     }
     return pageHeader;
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
index ac3cd3b..82c288f 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
@@ -18,8 +18,6 @@
  */
 package org.apache.parquet.hadoop;
 
-import static org.apache.parquet.column.statistics.Statistics.getStatsBasedOnType;
-
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -79,7 +77,6 @@ class ColumnChunkPageWriteStore implements PageWriteStore {
       this.compressor = compressor;
       this.allocator = allocator;
       this.buf = new ConcatenatingByteArrayCollector();
-      this.totalStatistics = getStatsBasedOnType(this.path.getType());
     }
 
     @Override
@@ -116,7 +113,14 @@ class ColumnChunkPageWriteStore implements PageWriteStore {
       this.compressedLength += compressedSize;
       this.totalValueCount += valueCount;
       this.pageCount += 1;
-      this.totalStatistics.mergeStatistics(statistics);
+
+      // Copying the statistics if it is not initialized yet so we have the correct typed one
+      if (totalStatistics == null) {
+        totalStatistics = statistics.copy();
+      } else {
+        totalStatistics.mergeStatistics(statistics);
+      }
+
       // by concatenating before collecting instead of collecting twice,
       // we only allocate one buffer to copy into instead of multiple.
       buf.collect(BytesInput.concat(BytesInput.from(tempOutputStream), compressedBytes));
@@ -154,7 +158,13 @@ class ColumnChunkPageWriteStore implements PageWriteStore {
       this.compressedLength += compressedSize;
       this.totalValueCount += valueCount;
       this.pageCount += 1;
-      this.totalStatistics.mergeStatistics(statistics);
+
+      // Copying the statistics if it is not initialized yet so we have the correct typed one
+      if (totalStatistics == null) {
+        totalStatistics = statistics.copy();
+      } else {
+        totalStatistics.mergeStatistics(statistics);
+      }
 
       // by concatenating before collecting instead of collecting twice,
       // we only allocate one buffer to copy into instead of multiple.

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index da8635d..285c2db 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -65,7 +65,7 @@ import org.apache.parquet.io.SeekableInputStream;
 import org.apache.parquet.io.ParquetEncodingException;
 import org.apache.parquet.io.PositionOutputStream;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.TypeUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -116,7 +116,7 @@ public class ParquetFileWriter {
   // column chunk data set at the start of a column
   private CompressionCodecName currentChunkCodec; // set in startColumn
   private ColumnPath currentChunkPath;            // set in startColumn
-  private PrimitiveTypeName currentChunkType;     // set in startColumn
+  private PrimitiveType currentChunkType;         // set in startColumn
   private long currentChunkValueCount;            // set in startColumn
   private long currentChunkFirstDataPage;         // set in startColumn (out.pos())
   private long currentChunkDictionaryPageOffset;  // set in writeDictionaryPage
@@ -317,15 +317,14 @@ public class ParquetFileWriter {
     encodingStatsBuilder.clear();
     currentEncodings = new HashSet<Encoding>();
     currentChunkPath = ColumnPath.get(descriptor.getPath());
-    currentChunkType = descriptor.getType();
+    currentChunkType = descriptor.getPrimitiveType();
     currentChunkCodec = compressionCodecName;
     currentChunkValueCount = valueCount;
     currentChunkFirstDataPage = out.getPos();
     compressedLength = 0;
     uncompressedLength = 0;
-    // need to know what type of stats to initialize to
-    // better way to do this?
-    currentStatistics = Statistics.getStatsBasedOnType(currentChunkType);
+    // The statistics will be copied from the first one added at writeDataPage(s) so we have the correct typed one
+    currentStatistics = null;
   }
 
   /**
@@ -425,7 +424,14 @@ public class ParquetFileWriter {
     this.compressedLength += compressedPageSize + headerSize;
     LOG.debug("{}: write data page content {}", out.getPos(), compressedPageSize);
     bytes.writeAllTo(out);
-    currentStatistics.mergeStatistics(statistics);
+
+    // Copying the statistics if it is not initialized yet so we have the correct typed one
+    if (currentStatistics == null) {
+      currentStatistics = statistics.copy();
+    } else {
+      currentStatistics.mergeStatistics(statistics);
+    }
+
     encodingStatsBuilder.addDataEncoding(valuesEncoding);
     currentEncodings.add(rlEncoding);
     currentEncodings.add(dlEncoding);
@@ -599,7 +605,7 @@ public class ParquetFileWriter {
 
       currentBlock.addColumn(ColumnChunkMetaData.get(
           chunk.getPath(),
-          chunk.getType(),
+          chunk.getPrimitiveType(),
           chunk.getCodec(),
           chunk.getEncodingStats(),
           chunk.getEncodings(),

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
index 720bd77..e198698 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
@@ -24,7 +24,9 @@ import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.EncodingStats;
 import org.apache.parquet.column.statistics.BooleanStatistics;
 import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
 
 /**
  * Column meta data for a block stored in the file footer and passed in the InputSplit
@@ -65,6 +67,12 @@ abstract public class ColumnChunkMetaData {
         valueCount, totalSize, totalUncompressedSize);
   }
 
+  /**
+   * @deprecated will be removed in 2.0.0. Use
+   *             {@link #get(ColumnPath, PrimitiveType, CompressionCodecName, EncodingStats, Set, Statistics, long, long, long, long, long)}
+   *             instead.
+   */
+  @Deprecated
   public static ColumnChunkMetaData get(
       ColumnPath path,
       PrimitiveTypeName type,
@@ -77,6 +85,22 @@ abstract public class ColumnChunkMetaData {
       long valueCount,
       long totalSize,
       long totalUncompressedSize) {
+    return get(path, Types.optional(type).named("fake_type"), codec, encodingStats, encodings, statistics,
+        firstDataPage, dictionaryPageOffset, valueCount, totalSize, totalUncompressedSize);
+  }
+
+  public static ColumnChunkMetaData get(
+      ColumnPath path,
+      PrimitiveType type,
+      CompressionCodecName codec,
+      EncodingStats encodingStats,
+      Set<Encoding> encodings,
+      Statistics statistics,
+      long firstDataPage,
+      long dictionaryPageOffset,
+      long valueCount,
+      long totalSize,
+      long totalUncompressedSize) {
     // to save space we store those always positive longs in ints when they fit.
     if (positiveLongFitsInAnInt(firstDataPage)
         && positiveLongFitsInAnInt(dictionaryPageOffset)
@@ -149,19 +173,30 @@ abstract public class ColumnChunkMetaData {
   /**
    *
    * @return column identifier
+   * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
    */
+  @Deprecated
   public ColumnPath getPath() {
     return properties.getPath();
   }
 
   /**
    * @return type of the column
+   * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
    */
+  @Deprecated
   public PrimitiveTypeName getType() {
     return properties.getType();
   }
 
   /**
+   * @return the primitive type object of the column
+   */
+  public PrimitiveType getPrimitiveType() {
+    return properties.getPrimitiveType();
+  }
+
+  /**
    * @return start of the column data offset
    */
   abstract public long getFirstDataPageOffset();
@@ -231,7 +266,7 @@ class IntColumnChunkMetaData extends ColumnChunkMetaData {
    */
   IntColumnChunkMetaData(
       ColumnPath path,
-      PrimitiveTypeName type,
+      PrimitiveType type,
       CompressionCodecName codec,
       EncodingStats encodingStats,
       Set<Encoding> encodings,
@@ -336,7 +371,7 @@ class LongColumnChunkMetaData extends ColumnChunkMetaData {
    */
   LongColumnChunkMetaData(
       ColumnPath path,
-      PrimitiveTypeName type,
+      PrimitiveType type,
       CompressionCodecName codec,
       EncodingStats encodingStats,
       Set<Encoding> encodings,

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
index 5e26675..233cf94 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
@@ -22,24 +22,36 @@ import java.util.Arrays;
 import java.util.Set;
 
 import org.apache.parquet.column.Encoding;
+import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type;
 
 public class ColumnChunkProperties {
 
   private static Canonicalizer<ColumnChunkProperties> properties = new Canonicalizer<ColumnChunkProperties>();
 
+  /**
+   * @deprecated will be removed in 2.0.0. Use {@link #get(ColumnPath, PrimitiveType, CompressionCodecName, Set)}
+   *             instead.
+   */
+  @Deprecated
   public static ColumnChunkProperties get(ColumnPath path, PrimitiveTypeName type, CompressionCodecName codec, Set<Encoding> encodings) {
+    return get(path, new PrimitiveType(Type.Repetition.OPTIONAL, type, ""), codec, encodings);
+  }
+
+  public static ColumnChunkProperties get(ColumnPath path, PrimitiveType type, CompressionCodecName codec,
+      Set<Encoding> encodings) {
     return properties.canonicalize(new ColumnChunkProperties(codec, path, type, encodings));
   }
 
   private final CompressionCodecName codec;
   private final ColumnPath path;
-  private final PrimitiveTypeName type;
+  private final PrimitiveType type;
   private final Set<Encoding> encodings;
 
   private ColumnChunkProperties(CompressionCodecName codec,
                                 ColumnPath path,
-                                PrimitiveTypeName type,
+                                PrimitiveType type,
                                 Set<Encoding> encodings) {
     super();
     this.codec = codec;
@@ -56,7 +68,19 @@ public class ColumnChunkProperties {
     return path;
   }
 
+  /**
+   * @return the primitive type name for the column
+   * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
+   */
+  @Deprecated
   public PrimitiveTypeName getType() {
+    return type.getPrimitiveTypeName();
+  }
+
+  /**
+   * @return the primitive type object for the column
+   */
+  public PrimitiveType getPrimitiveType() {
     return type;
   }
 
@@ -68,7 +92,7 @@ public class ColumnChunkProperties {
   public boolean equals(Object obj) {
     if (obj instanceof ColumnChunkProperties) {
       ColumnChunkProperties other = (ColumnChunkProperties)obj;
-      return other.codec == codec && other.path.equals(path) && other.type == type && equals(other.encodings, encodings);
+      return other.codec == codec && other.path.equals(path) && other.type.equals(type) && equals(other.encodings, encodings);
     }
     return false;
   }


Mime
View raw message