parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [1/3] parquet-mr git commit: PARQUET-1025: Support new min-max statistics in parquet-mr
Date Sat, 13 Jan 2018 00:29:53 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 4d996d1ba -> c6764c4a0


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 4df45dd..ee92d46 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -22,7 +22,9 @@ import static java.util.Collections.emptyList;
 import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
 import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.apache.parquet.format.CompressionCodec.UNCOMPRESSED;
 import static org.apache.parquet.format.Type.INT32;
@@ -34,6 +36,8 @@ import static org.apache.parquet.format.converter.ParquetMetadataConverter.getOf
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -48,6 +52,7 @@ import com.google.common.collect.Sets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.Version;
 import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.statistics.BinaryStatistics;
 import org.apache.parquet.column.statistics.BooleanStatistics;
 import org.apache.parquet.column.statistics.DoubleStatistics;
@@ -61,9 +66,9 @@ import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveType;
 import org.junit.Assert;
 import org.junit.Test;
-
 import org.apache.parquet.example.Paper;
 import org.apache.parquet.format.ColumnChunk;
 import org.apache.parquet.format.ColumnMetaData;
@@ -75,6 +80,7 @@ import org.apache.parquet.format.PageType;
 import org.apache.parquet.format.RowGroup;
 import org.apache.parquet.format.SchemaElement;
 import org.apache.parquet.format.Type;
+import org.apache.parquet.schema.ColumnOrder;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -101,7 +107,7 @@ public class TestParquetMetadataConverter {
   public void testSchemaConverter() {
     ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
     List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(Paper.schema);
-    MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema);
+    MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
     assertEquals(Paper.schema, schema);
   }
 
@@ -370,7 +376,16 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
-  public void testBinaryStats() {
+  public void testBinaryStatsV1() {
+    testBinaryStats(StatsHelper.V1);
+  }
+
+  @Test
+  public void testBinaryStatsV2() {
+    testBinaryStats(StatsHelper.V2);
+  }
+
+  private void testBinaryStats(StatsHelper helper) {
     // make fake stats and verify the size check
     BinaryStatistics stats = new BinaryStatistics();
     stats.incrementNumNulls(3004);
@@ -384,33 +399,47 @@ public class TestParquetMetadataConverter {
     Assert.assertTrue("Should be smaller than min + max size + 1",
         stats.isSmallerThan(totalLen + 1));
 
-    org.apache.parquet.format.Statistics formatStats =
-        ParquetMetadataConverter.toParquetStatistics(stats);
+    org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);
 
-    Assert.assertArrayEquals("Min should match", min, formatStats.getMin());
-    Assert.assertArrayEquals("Max should match", max, formatStats.getMax());
+    assertFalse("Min should not be set", formatStats.isSetMin());
+    assertFalse("Max should not be set", formatStats.isSetMax());
+    if (helper == StatsHelper.V2) {
+      Assert.assertArrayEquals("Min_value should match", min, formatStats.getMin_value());
+      Assert.assertArrayEquals("Max_value should match", max, formatStats.getMax_value());
+    }
     Assert.assertEquals("Num nulls should match",
         3004, formatStats.getNull_count());
 
     // convert to empty stats because the values are too large
     stats.setMinMaxFromBytes(max, max);
 
-    formatStats = ParquetMetadataConverter.toParquetStatistics(stats);
+    formatStats = helper.toParquetStatistics(stats);
 
     Assert.assertFalse("Min should not be set", formatStats.isSetMin());
     Assert.assertFalse("Max should not be set", formatStats.isSetMax());
+    Assert.assertFalse("Min_value should not be set", formatStats.isSetMin_value());
+    Assert.assertFalse("Max_value should not be set", formatStats.isSetMax_value());
     Assert.assertFalse("Num nulls should not be set",
         formatStats.isSetNull_count());
 
     Statistics roundTripStats = ParquetMetadataConverter.fromParquetStatisticsInternal(
-        Version.FULL_VERSION, formatStats, PrimitiveTypeName.BINARY,
+        Version.FULL_VERSION, formatStats, new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, ""),
         ParquetMetadataConverter.SortOrder.SIGNED);
 
     Assert.assertTrue(roundTripStats.isEmpty());
   }
 
   @Test
-  public void testIntegerStats() {
+  public void testIntegerStatsV1() {
+    testIntegerStats(StatsHelper.V1);
+  }
+
+  @Test
+  public void testIntegerStatsV2() {
+    testIntegerStats(StatsHelper.V2);
+  }
+
+  private void testIntegerStats(StatsHelper helper) {
     // make fake stats and verify the size check
     IntStatistics stats = new IntStatistics();
     stats.incrementNumNulls(3004);
@@ -419,8 +448,7 @@ public class TestParquetMetadataConverter {
     stats.updateStats(min);
     stats.updateStats(max);
 
-    org.apache.parquet.format.Statistics formatStats =
-        ParquetMetadataConverter.toParquetStatistics(stats);
+    org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);
 
     Assert.assertEquals("Min should match",
         min, BytesUtils.bytesToInt(formatStats.getMin()));
@@ -431,7 +459,16 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
-  public void testLongStats() {
+  public void testLongStatsV1() {
+    testLongStats(StatsHelper.V1);
+  }
+
+  @Test
+  public void testLongStatsV2() {
+    testLongStats(StatsHelper.V2);
+  }
+
+  private void testLongStats(StatsHelper helper) {
     // make fake stats and verify the size check
     LongStatistics stats = new LongStatistics();
     stats.incrementNumNulls(3004);
@@ -440,8 +477,7 @@ public class TestParquetMetadataConverter {
     stats.updateStats(min);
     stats.updateStats(max);
 
-    org.apache.parquet.format.Statistics formatStats =
-        ParquetMetadataConverter.toParquetStatistics(stats);
+    org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);
 
     Assert.assertEquals("Min should match",
         min, BytesUtils.bytesToLong(formatStats.getMin()));
@@ -452,7 +488,16 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
-  public void testFloatStats() {
+  public void testFloatStatsV1() {
+    testFloatStats(StatsHelper.V1);
+  }
+
+  @Test
+  public void testFloatStatsV2() {
+    testFloatStats(StatsHelper.V2);
+  }
+
+  private void testFloatStats(StatsHelper helper) {
     // make fake stats and verify the size check
     FloatStatistics stats = new FloatStatistics();
     stats.incrementNumNulls(3004);
@@ -461,8 +506,7 @@ public class TestParquetMetadataConverter {
     stats.updateStats(min);
     stats.updateStats(max);
 
-    org.apache.parquet.format.Statistics formatStats =
-        ParquetMetadataConverter.toParquetStatistics(stats);
+    org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);
 
     Assert.assertEquals("Min should match",
         min, Float.intBitsToFloat(BytesUtils.bytesToInt(formatStats.getMin())),
@@ -475,7 +519,16 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
-  public void testDoubleStats() {
+  public void testDoubleStatsV1() {
+    testDoubleStats(StatsHelper.V1);
+  }
+
+  @Test
+  public void testDoubleStatsV2() {
+    testDoubleStats(StatsHelper.V2);
+  }
+
+  private void testDoubleStats(StatsHelper helper) {
     // make fake stats and verify the size check
     DoubleStatistics stats = new DoubleStatistics();
     stats.incrementNumNulls(3004);
@@ -484,8 +537,7 @@ public class TestParquetMetadataConverter {
     stats.updateStats(min);
     stats.updateStats(max);
 
-    org.apache.parquet.format.Statistics formatStats =
-        ParquetMetadataConverter.toParquetStatistics(stats);
+    org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);
 
     Assert.assertEquals("Min should match",
         min, Double.longBitsToDouble(BytesUtils.bytesToLong(formatStats.getMin())),
@@ -498,7 +550,16 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
-  public void testBooleanStats() {
+  public void testBooleanStatsV1() {
+    testBooleanStats(StatsHelper.V1);
+  }
+
+  @Test
+  public void testBooleanStatsV2() {
+    testBooleanStats(StatsHelper.V2);
+  }
+
+  private void testBooleanStats(StatsHelper helper) {
     // make fake stats and verify the size check
     BooleanStatistics stats = new BooleanStatistics();
     stats.incrementNumNulls(3004);
@@ -507,8 +568,7 @@ public class TestParquetMetadataConverter {
     stats.updateStats(min);
     stats.updateStats(max);
 
-    org.apache.parquet.format.Statistics formatStats =
-        ParquetMetadataConverter.toParquetStatistics(stats);
+    org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);
 
     Assert.assertEquals("Min should match",
         min, BytesUtils.bytesToBool(formatStats.getMin()));
@@ -528,17 +588,27 @@ public class TestParquetMetadataConverter {
     stats.updateStats(Binary.fromString("z"));
     stats.incrementNumNulls();
 
+    PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
+        .as(OriginalType.UTF8).named("b");
     Statistics convertedStats = converter.fromParquetStatistics(
         Version.FULL_VERSION,
-        ParquetMetadataConverter.toParquetStatistics(stats),
-        Types.required(PrimitiveTypeName.BINARY)
-            .as(OriginalType.UTF8).named("b"));
+        StatsHelper.V1.toParquetStatistics(stats),
+        binaryType);
 
     Assert.assertTrue("Stats should be empty: " + convertedStats, convertedStats.isEmpty());
   }
 
   @Test
-  public void testStillUseStatsWithSignedSortOrderIfSingleValue() {
+  public void testStillUseStatsWithSignedSortOrderIfSingleValueV1() {
+    testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper.V1);
+  }
+
+  @Test
+  public void testStillUseStatsWithSignedSortOrderIfSingleValueV2() {
+    testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper.V2);
+  }
+
+  private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
     ParquetMetadataConverter converter = new ParquetMetadataConverter();
     BinaryStatistics stats = new BinaryStatistics();
     stats.incrementNumNulls();
@@ -547,18 +617,27 @@ public class TestParquetMetadataConverter {
     stats.updateStats(Binary.fromString("A"));
     stats.incrementNumNulls();
 
+    PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
     Statistics convertedStats = converter.fromParquetStatistics(
         Version.FULL_VERSION,
         ParquetMetadataConverter.toParquetStatistics(stats),
-        Types.required(PrimitiveTypeName.BINARY)
-            .as(OriginalType.UTF8).named("b"));
+        binaryType);
 
     Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
     Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
   }
 
   @Test
-  public void testUseStatsWithSignedSortOrder() {
+  public void testUseStatsWithSignedSortOrderV1() {
+    testUseStatsWithSignedSortOrder(StatsHelper.V1);
+  }
+
+  @Test
+  public void testUseStatsWithSignedSortOrderV2() {
+    testUseStatsWithSignedSortOrder(StatsHelper.V2);
+  }
+
+  private void testUseStatsWithSignedSortOrder(StatsHelper helper) {
     // override defaults and use stats that were accumulated using signed order
     Configuration conf = new Configuration();
     conf.setBoolean("parquet.strings.signed-min-max.enabled", true);
@@ -571,17 +650,213 @@ public class TestParquetMetadataConverter {
     stats.updateStats(Binary.fromString("z"));
     stats.incrementNumNulls();
 
+    PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
+        .as(OriginalType.UTF8).named("b");
     Statistics convertedStats = converter.fromParquetStatistics(
         Version.FULL_VERSION,
-        ParquetMetadataConverter.toParquetStatistics(stats),
-        Types.required(PrimitiveTypeName.BINARY)
-            .as(OriginalType.UTF8).named("b"));
+        helper.toParquetStatistics(stats),
+        binaryType);
 
     Assert.assertFalse("Stats should not be empty", convertedStats.isEmpty());
     Assert.assertEquals("Should have 3 nulls", 3, convertedStats.getNumNulls());
-    Assert.assertEquals("Should have correct min (unsigned sort)",
-        Binary.fromString("A"), convertedStats.genericGetMin());
-    Assert.assertEquals("Should have correct max (unsigned sort)",
-        Binary.fromString("z"), convertedStats.genericGetMax());
+    if (helper == StatsHelper.V1) {
+      assertFalse("Min-max should be null for V1 stats", convertedStats.hasNonNullValue());
+    } else {
+      Assert.assertEquals("Should have correct min (unsigned sort)",
+          Binary.fromString("A"), convertedStats.genericGetMin());
+      Assert.assertEquals("Should have correct max (unsigned sort)",
+          Binary.fromString("z"), convertedStats.genericGetMax());
+    }
+  }
+
+  @Test
+  public void testSkippedV2Stats() {
+    testSkippedV2Stats(
+        Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(12).as(OriginalType.INTERVAL).named(""),
+        new BigInteger("12345678"),
+        new BigInteger("12345679"));
+    testSkippedV2Stats(Types.optional(PrimitiveTypeName.INT96).named(""),
+        new BigInteger("-75687987"),
+        new BigInteger("45367657"));
+  }
+
+  private void testSkippedV2Stats(PrimitiveType type, Object min, Object max) {
+    Statistics<?> stats = createStats(type, min, max);
+    org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats);
+    assertFalse(statistics.isSetMin());
+    assertFalse(statistics.isSetMax());
+    assertFalse(statistics.isSetMin_value());
+    assertFalse(statistics.isSetMax_value());
+  }
+
+  @Test
+  public void testV2OnlyStats() {
+    testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
+        0x7F,
+        0x80);
+    testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
+        0x7FFF,
+        0x8000);
+    testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
+        0x7FFFFFFF,
+        0x80000000);
+    testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
+        0x7FFFFFFFFFFFFFFFL,
+        0x8000000000000000L);
+    testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
+        new BigInteger("-765875"),
+        new BigInteger("876856"));
+    testV2OnlyStats(
+        Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
+            .named(""),
+        new BigInteger("-6769643"),
+        new BigInteger("9864675"));
+  }
+
+  private void testV2OnlyStats(PrimitiveType type, Object min, Object max) {
+    Statistics<?> stats = createStats(type, min, max);
+    org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats);
+    assertFalse(statistics.isSetMin());
+    assertFalse(statistics.isSetMax());
+    assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min_value);
+    assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max_value);
+  }
+
+  @Test
+  public void testV2StatsEqualMinMax() {
+    testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
+        93,
+        93);
+    testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
+        -5892,
+        -5892);
+    testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
+        234998934,
+        234998934);
+    testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
+        -2389943895984985L,
+        -2389943895984985L);
+    testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
+        new BigInteger("823749"),
+        new BigInteger("823749"));
+    testV2StatsEqualMinMax(
+        Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
+            .named(""),
+        new BigInteger("-8752832"),
+        new BigInteger("-8752832"));
+    testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT96).named(""),
+        new BigInteger("81032984"),
+        new BigInteger("81032984"));
+  }
+
+  private void testV2StatsEqualMinMax(PrimitiveType type, Object min, Object max) {
+    Statistics<?> stats = createStats(type, min, max);
+    org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats);
+    assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min);
+    assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max);
+    assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min_value);
+    assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max_value);
+  }
+
+  private static <T> Statistics<?> createStats(PrimitiveType type, T min, T max) {
+    Class<?> c = min.getClass();
+    if (c == Integer.class) {
+      return createStatsTyped(type, (Integer) min, (Integer) max);
+    } else if (c == Long.class) {
+      return createStatsTyped(type, (Long) min, (Long) max);
+    } else if (c == BigInteger.class) {
+      return createStatsTyped(type, (BigInteger) min, (BigInteger) max);
+    }
+    fail("Not implemented");
+    return null;
+  }
+
+  private static Statistics<?> createStatsTyped(PrimitiveType type, int min, int max) {
+    Statistics<?> stats = Statistics.createStats(type);
+    stats.updateStats(max);
+    stats.updateStats(min);
+    assertEquals(min, stats.genericGetMin());
+    assertEquals(max, stats.genericGetMax());
+    return stats;
+  }
+
+  private static Statistics<?> createStatsTyped(PrimitiveType type, long min, long max) {
+    Statistics<?> stats = Statistics.createStats(type);
+    stats.updateStats(max);
+    stats.updateStats(min);
+    assertEquals(min, stats.genericGetMin());
+    assertEquals(max, stats.genericGetMax());
+    return stats;
+  }
+
+  private static Statistics<?> createStatsTyped(PrimitiveType type, BigInteger min, BigInteger max) {
+    Statistics<?> stats = Statistics.createStats(type);
+    Binary minBinary = Binary.fromConstantByteArray(min.toByteArray());
+    Binary maxBinary = Binary.fromConstantByteArray(max.toByteArray());
+    stats.updateStats(maxBinary);
+    stats.updateStats(minBinary);
+    assertEquals(minBinary, stats.genericGetMin());
+    assertEquals(maxBinary, stats.genericGetMax());
+    return stats;
+  }
+
+  private enum StatsHelper {
+    // Only min and max are filled (min_value and max_value are not)
+    V1() {
+      @Override
+      public org.apache.parquet.format.Statistics toParquetStatistics(Statistics<?> stats) {
+        org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats);
+        statistics.unsetMin_value();
+        statistics.unsetMax_value();
+        return statistics;
+      }
+    },
+    // min_value and max_value are filled (min and max might be filled as well)
+    V2() {
+      @Override
+      public org.apache.parquet.format.Statistics toParquetStatistics(Statistics<?> stats) {
+        return ParquetMetadataConverter.toParquetStatistics(stats);
+      }
+    };
+    public abstract org.apache.parquet.format.Statistics toParquetStatistics(Statistics<?> stats);
+  }
+
+  @Test
+  public void testColumnOrders() throws IOException {
+    MessageType schema = parseMessageType("message test {"
+        + "  optional binary binary_col;"               // Normal column with type defined column order -> typeDefined
+        + "  optional group map_col (MAP) {"
+        + "    repeated group map (MAP_KEY_VALUE) {"
+        + "        required binary key (UTF8);"         // Key to be hacked to have unknown column order -> undefined
+        + "        optional group list_col (LIST) {"
+        + "          repeated group list {"
+        + "            optional int96 array_element;"   // INT96 element with type defined column order -> undefined
+        + "          }"
+        + "        }"
+        + "    }"
+        + "  }"
+        + "}");
+    org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = new org.apache.parquet.hadoop.metadata.FileMetaData(
+        schema, new HashMap<String, String>(), null);
+    ParquetMetadata metadata = new ParquetMetadata(fileMetaData, new ArrayList<BlockMetaData>());
+    ParquetMetadataConverter converter = new ParquetMetadataConverter();
+    FileMetaData formatMetadata = converter.toParquetMetadata(1, metadata);
+
+    List<org.apache.parquet.format.ColumnOrder> columnOrders = formatMetadata.getColumn_orders();
+    assertEquals(3, columnOrders.size());
+    for (org.apache.parquet.format.ColumnOrder columnOrder : columnOrders) {
+      assertTrue(columnOrder.isSetTYPE_ORDER());
+    }
+
+    // Simulate that thrift got a union type that is not in the generated code
+    // (when the file contains a not-yet-supported column order)
+    columnOrders.get(1).clear();
+
+    MessageType resultSchema = converter.fromParquetMetadata(formatMetadata).getFileMetaData().getSchema();
+    List<ColumnDescriptor> columns = resultSchema.getColumns();
+    assertEquals(3, columns.size());
+    assertEquals(ColumnOrder.typeDefined(), columns.get(0).getPrimitiveType().columnOrder());
+    assertEquals(ColumnOrder.undefined(), columns.get(1).getPrimitiveType().columnOrder());
+    assertEquals(ColumnOrder.undefined(), columns.get(2).getPrimitiveType().columnOrder());
   }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
index 6915c86..4243e9b 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
@@ -524,12 +524,12 @@ public class TestParquetFileWriter {
       String str = new String(bsout.getMaxBytes());
       String str2 = new String(bsout.getMinBytes());
 
-      assertTrue(((BinaryStatistics)readFooter.getBlocks().get(0).getColumns().get(0).getStatistics()).equals(bs1));
-      assertTrue(((LongStatistics)readFooter.getBlocks().get(0).getColumns().get(1).getStatistics()).equals(ls1));
+      TestUtils.assertStatsValuesEqual(bs1, readFooter.getBlocks().get(0).getColumns().get(0).getStatistics());
+      TestUtils.assertStatsValuesEqual(ls1, readFooter.getBlocks().get(0).getColumns().get(1).getStatistics());
     }
     { // assert stats are correct for the second block
-      assertTrue(((BinaryStatistics)readFooter.getBlocks().get(1).getColumns().get(0).getStatistics()).equals(bs2));
-      assertTrue(((LongStatistics)readFooter.getBlocks().get(1).getColumns().get(1).getStatistics()).equals(ls2));
+      TestUtils.assertStatsValuesEqual(bs2, readFooter.getBlocks().get(1).getColumns().get(0).getStatistics());
+      TestUtils.assertStatsValuesEqual(ls2, readFooter.getBlocks().get(1).getColumns().get(1).getStatistics());
     }
   }
 

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java
index e53ac78..59b4b62 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java
@@ -24,6 +24,8 @@ import java.util.concurrent.Callable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.statistics.Statistics;
+import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
 
 public class TestUtils {
@@ -61,4 +63,23 @@ public class TestUtils {
       }
     }
   }
+
+  public static void assertStatsValuesEqual(Statistics<?> stats1, Statistics<?> stats2) {
+    assertStatsValuesEqual(null, stats1, stats2);
+  }
+
+  // To be used to assert that the values (min, max, num-of-nulls) equals. It might be used in cases when creating
+  // Statistics object for the proper Type would require too much work/code duplications etc.
+  public static void assertStatsValuesEqual(String message, Statistics<?> expected, Statistics<?> actual) {
+    if (expected == actual) {
+      return;
+    }
+    if (expected == null || actual == null) {
+      Assert.assertEquals(expected, actual);
+    }
+    Assert.assertThat(actual, CoreMatchers.instanceOf(expected.getClass()));
+    Assert.assertArrayEquals(message, expected.getMaxBytes(), actual.getMaxBytes());
+    Assert.assertArrayEquals(message, expected.getMinBytes(), actual.getMinBytes());
+    Assert.assertEquals(message, expected.getNumNulls(), actual.getNumNulls());
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java
index cbdd935..16db5cb 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java
@@ -26,7 +26,7 @@ import java.util.Random;
 public class RandomValues {
   private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890";
 
-  private static abstract class RandomValueGenerator<T extends Comparable<T>> {
+  static abstract class RandomValueGenerator<T extends Comparable<T>> {
     private final Random random;
 
     protected RandomValueGenerator(long seed) {
@@ -37,8 +37,8 @@ public class RandomValues {
       return (random.nextInt(10) == 0);
     }
 
-    public int randomInt() { return randomInt(Integer.MAX_VALUE - 1); }
-    public int randomInt(int maximum) {
+    public int randomInt() { return random.nextInt(); }
+    public int randomPositiveInt(int maximum) {
       // Maximum may be a random number (which may be negative).
       return random.nextInt(Math.abs(maximum) + 1);
     }
@@ -63,11 +63,11 @@ public class RandomValues {
     }
 
     public char randomLetter() {
-      return ALPHABET.charAt(randomInt() % ALPHABET.length());
+      return ALPHABET.charAt(randomPositiveInt(ALPHABET.length() - 1));
     }
 
     public String randomString(int maxLength) {
-      return randomFixedLengthString(randomInt(maxLength));
+      return randomFixedLengthString(randomPositiveInt(maxLength));
     }
 
     public String randomFixedLengthString(int length) {
@@ -82,7 +82,7 @@ public class RandomValues {
     public abstract T nextValue();
   }
 
-  private static abstract class RandomBinaryBase<T extends Comparable<T>> extends RandomValueGenerator<T> {
+  static abstract class RandomBinaryBase<T extends Comparable<T>> extends RandomValueGenerator<T> {
     protected final int bufferLength;
     protected final byte[] buffer;
 
@@ -103,18 +103,56 @@ public class RandomValues {
   }
 
   public static class IntGenerator extends RandomValueGenerator<Integer> {
-    private final RandomRange<Integer> randomRange = new RandomRange<Integer>(randomInt(), randomInt());
-    private final int minimum = randomRange.minimum();
-    private final int maximum = randomRange.maximum();
-    private final int range = (maximum - minimum);
+    private final int minimum;
+    private final int range;
 
     public IntGenerator(long seed) {
       super(seed);
+      RandomRange<Integer> randomRange = new RandomRange<>(randomInt(), randomInt());
+      this.minimum = randomRange.minimum();
+      this.range = (randomRange.maximum() - this.minimum);
+    }
+
+    public IntGenerator(long seed, int minimum, int maximum) {
+      super(seed);
+      RandomRange<Integer> randomRange = new RandomRange<>(minimum, maximum);
+      this.minimum = randomRange.minimum();
+      this.range = randomRange.maximum() - this.minimum;
     }
 
     @Override
     public Integer nextValue() {
-      return (minimum + randomInt(range));
+      return (minimum + randomPositiveInt(range));
+    }
+  }
+
+  public static class UIntGenerator extends IntGenerator {
+    private final int mask;
+
+    public UIntGenerator(long seed, byte minimum, byte maximum) {
+      super(seed, minimum, maximum);
+      mask = 0xFF;
+    }
+
+    public UIntGenerator(long seed, short minimum, short maximum) {
+      super(seed, minimum, maximum);
+      mask = 0xFFFF;
+    }
+
+    @Override
+    public Integer nextValue() {
+      return super.nextValue() & mask;
+    }
+  }
+
+  public static class UnconstrainedIntGenerator extends RandomValueGenerator<Integer> {
+    public UnconstrainedIntGenerator(long seed) {
+      super(seed);
+    }
+
+    @Override
+    public Integer nextValue() {
+      return randomInt();
     }
   }
 
@@ -134,6 +172,17 @@ public class RandomValues {
     }
   }
 
+  public static class UnconstrainedLongGenerator extends RandomValueGenerator<Long> {
+    public UnconstrainedLongGenerator(long seed) {
+      super(seed);
+    }
+
+    @Override
+    public Long nextValue() {
+      return randomLong();
+    }
+  }
+
   public static class Int96Generator extends RandomBinaryBase<BigInteger> {
     private final RandomRange<BigInteger> randomRange = new RandomRange<BigInteger>(randomInt96(), randomInt96());
     private final BigInteger minimum = randomRange.minimum();
@@ -173,6 +222,17 @@ public class RandomValues {
     }
   }
 
+  public static class UnconstrainedFloatGenerator extends RandomValueGenerator<Float> {
+    public UnconstrainedFloatGenerator(long seed) {
+      super(seed);
+    }
+
+    @Override
+    public Float nextValue() {
+      return randomFloat();
+    }
+  }
+
   public static class DoubleGenerator extends RandomValueGenerator<Double> {
     private final RandomRange<Double> randomRange = new RandomRange<Double>(randomDouble(), randomDouble());
     private final double minimum = randomRange.minimum();
@@ -189,6 +249,17 @@ public class RandomValues {
     }
   }
 
+  public static class UnconstrainedDoubleGenerator extends RandomValueGenerator<Double> {
+    public UnconstrainedDoubleGenerator(long seed) {
+      super(seed);
+    }
+
+    @Override
+    public Double nextValue() {
+      return randomDouble();
+    }
+  }
+
   public static class StringGenerator extends RandomBinaryBase<String> {
     private static final int MAX_STRING_LENGTH = 16;
     public StringGenerator(long seed) {
@@ -197,7 +268,7 @@ public class RandomValues {
 
     @Override
     public String nextValue() {
-      int stringLength = randomInt(15) + 1;
+      int stringLength = randomPositiveInt(15) + 1;
       return randomString(stringLength);
     }
 
@@ -216,7 +287,7 @@ public class RandomValues {
     @Override
     public Binary nextValue() {
       // use a random length, but ensure it is at least a few bytes
-      int length = 5 + randomInt(buffer.length - 5);
+      int length = 5 + randomPositiveInt(buffer.length - 5);
       for (int index = 0; index < length; index++) {
         buffer[index] = (byte) randomInt();
       }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java
index d157cc3..5a5d6d4 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java
@@ -42,8 +42,13 @@ import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.io.api.PrimitiveConverter;
 import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types;
+import org.apache.parquet.statistics.RandomValues.RandomBinaryBase;
+import org.apache.parquet.statistics.RandomValues.RandomValueGenerator;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -51,7 +56,9 @@ import org.junit.rules.TemporaryFolder;
 
 import java.io.File;
 import java.io.IOException;
+import java.math.BigInteger;
 import java.util.Arrays;
+import java.util.Comparator;
 import java.util.List;
 import java.util.Random;
 
@@ -59,6 +66,7 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
 import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
 import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
 
 public class TestStatistics {
   private static final int MEGABYTE = 1 << 20;
@@ -163,9 +171,11 @@ public class TestStatistics {
     private final boolean hasNonNull;
     private final T min;
     private final T max;
+    private final Comparator<T> comparator;
 
     public StatsValidator(DataPage page) {
       Statistics<T> stats = getStatisticsFromPageHeader(page);
+      this.comparator = stats.comparator();
       this.hasNonNull = stats.hasNonNullValue();
       if (hasNonNull) {
         this.min = stats.genericGetMin();
@@ -178,8 +188,8 @@ public class TestStatistics {
 
     public void validate(T value) {
       if (hasNonNull) {
-        assertTrue("min should be <= all values", min.compareTo(value) <= 0);
-        assertTrue("min should be >= all values", max.compareTo(value) >= 0);
+        assertTrue("min should be <= all values", comparator.compare(min, value) <= 0);
+        assertTrue("min should be >= all values", comparator.compare(max, value) >= 0);
       }
     }
   }
@@ -280,7 +290,11 @@ public class TestStatistics {
     private void validateStatsForPage(DataPage page, DictionaryPage dict, ColumnDescriptor desc) {
       SingletonPageReader reader = new SingletonPageReader(dict, page);
       PrimitiveConverter converter = getValidatingConverter(page, desc.getType());
-      Statistics stats = getStatisticsFromPageHeader(page);
+      Statistics<?> stats = getStatisticsFromPageHeader(page);
+
+      assertEquals("Statistics does not use the proper comparator",
+          desc.getPrimitiveType().comparator().getClass(),
+          stats.comparator().getClass());
 
       if (stats.isEmpty()) {
         // stats are empty if num nulls = 0 and there are no non-null values
@@ -306,8 +320,8 @@ public class TestStatistics {
 
       System.err.println(String.format(
           "Validated stats min=%s max=%s nulls=%d for page=%s col=%s",
-          String.valueOf(stats.genericGetMin()),
-          String.valueOf(stats.genericGetMax()), stats.getNumNulls(), page,
+          stats.minAsString(),
+          stats.maxAsString(), stats.getNumNulls(), page,
           Arrays.toString(desc.getPath())));
     }
   }
@@ -315,92 +329,144 @@ public class TestStatistics {
   public static class DataContext extends DataGenerationContext.WriteContext {
     private static final int MAX_TOTAL_ROWS = 1000000;
 
-    private final long seed;
     private final Random random;
     private final int recordCount;
 
-    private final int fixedLength;
-    private final RandomValues.IntGenerator intGenerator;
-    private final RandomValues.LongGenerator longGenerator;
-    private final RandomValues.Int96Generator int96Generator;
-    private final RandomValues.FloatGenerator floatGenerator;
-    private final RandomValues.DoubleGenerator doubleGenerator;
-    private final RandomValues.StringGenerator stringGenerator;
-    private final RandomValues.BinaryGenerator binaryGenerator;
-    private final RandomValues.FixedGenerator fixedBinaryGenerator;
+    private final List<RandomValueGenerator<?>> randomGenerators;
 
     public DataContext(long seed, File path, int blockSize, int pageSize, boolean enableDictionary, ParquetProperties.WriterVersion version) throws IOException {
       super(path, buildSchema(seed), blockSize, pageSize, enableDictionary, true, version);
 
-      this.seed = seed;
       this.random = new Random(seed);
       this.recordCount = random.nextInt(MAX_TOTAL_ROWS);
 
-      this.fixedLength = schema.getType("fixed-binary").asPrimitiveType().getTypeLength();
-      this.intGenerator = new RandomValues.IntGenerator(random.nextLong());
-      this.longGenerator = new RandomValues.LongGenerator(random.nextLong());
-      this.int96Generator = new RandomValues.Int96Generator(random.nextLong());
-      this.floatGenerator = new RandomValues.FloatGenerator(random.nextLong());
-      this.doubleGenerator = new RandomValues.DoubleGenerator(random.nextLong());
-      this.stringGenerator = new RandomValues.StringGenerator(random.nextLong());
-      this.binaryGenerator = new RandomValues.BinaryGenerator(random.nextLong());
-      this.fixedBinaryGenerator = new RandomValues.FixedGenerator(random.nextLong(), fixedLength);
+      int fixedLength = schema.getType("fixed-binary").asPrimitiveType().getTypeLength();
+
+      randomGenerators = Arrays.<RandomValueGenerator<?>>asList(
+          new RandomValues.IntGenerator(random.nextLong()),
+          new RandomValues.LongGenerator(random.nextLong()),
+          new RandomValues.Int96Generator(random.nextLong()),
+          new RandomValues.FloatGenerator(random.nextLong()),
+          new RandomValues.DoubleGenerator(random.nextLong()),
+          new RandomValues.StringGenerator(random.nextLong()),
+          new RandomValues.BinaryGenerator(random.nextLong()),
+          new RandomValues.FixedGenerator(random.nextLong(), fixedLength),
+          new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedFloatGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedDoubleGenerator(random.nextLong()),
+          new RandomValues.IntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE),
+          new RandomValues.UIntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE),
+          new RandomValues.IntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE),
+          new RandomValues.UIntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE),
+          new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
+          new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
+          new RandomValues.FixedGenerator(random.nextLong(), fixedLength),
+          new RandomValues.BinaryGenerator(random.nextLong()),
+          new RandomValues.StringGenerator(random.nextLong()),
+          new RandomValues.StringGenerator(random.nextLong()),
+          new RandomValues.StringGenerator(random.nextLong()),
+          new RandomValues.BinaryGenerator(random.nextLong()),
+          new RandomValues.IntGenerator(random.nextLong()),
+          new RandomValues.IntGenerator(random.nextLong()),
+          new RandomValues.LongGenerator(random.nextLong()),
+          new RandomValues.LongGenerator(random.nextLong()),
+          new RandomValues.LongGenerator(random.nextLong()),
+          new RandomValues.FixedGenerator(random.nextLong(), 12)
+      );
     }
 
     private static MessageType buildSchema(long seed) {
       Random random = new Random(seed);
       int fixedBinaryLength = random.nextInt(21) + 1;
+      int fixedPrecision = calculatePrecision(fixedBinaryLength);
+      int fixedScale = fixedPrecision / 4;
+      int binaryPrecision = calculatePrecision(16);
+      int binaryScale = binaryPrecision / 4;
 
       return new MessageType("schema",
-        new PrimitiveType(OPTIONAL, INT32, "i32"),
-        new PrimitiveType(OPTIONAL, INT64, "i64"),
-        new PrimitiveType(OPTIONAL, INT96, "i96"),
-        new PrimitiveType(OPTIONAL, FLOAT, "sngl"),
-        new PrimitiveType(OPTIONAL, DOUBLE, "dbl"),
-        new PrimitiveType(OPTIONAL, BINARY, "strings"),
-        new PrimitiveType(OPTIONAL, BINARY, "binary"),
-        new PrimitiveType(OPTIONAL, FIXED_LEN_BYTE_ARRAY, fixedBinaryLength, "fixed-binary"),
-        new PrimitiveType(REQUIRED, INT32, "unconstrained-i32"),
-        new PrimitiveType(REQUIRED, INT64, "unconstrained-i64"),
-        new PrimitiveType(REQUIRED, FLOAT, "unconstrained-sngl"),
-        new PrimitiveType(REQUIRED, DOUBLE, "unconstrained-dbl")
+          new PrimitiveType(OPTIONAL, INT32, "i32"),
+          new PrimitiveType(OPTIONAL, INT64, "i64"),
+          new PrimitiveType(OPTIONAL, INT96, "i96"),
+          new PrimitiveType(OPTIONAL, FLOAT, "sngl"),
+          new PrimitiveType(OPTIONAL, DOUBLE, "dbl"),
+          new PrimitiveType(OPTIONAL, BINARY, "strings"),
+          new PrimitiveType(OPTIONAL, BINARY, "binary"),
+          new PrimitiveType(OPTIONAL, FIXED_LEN_BYTE_ARRAY, fixedBinaryLength, "fixed-binary"),
+          new PrimitiveType(REQUIRED, INT32, "unconstrained-i32"),
+          new PrimitiveType(REQUIRED, INT64, "unconstrained-i64"),
+          new PrimitiveType(REQUIRED, FLOAT, "unconstrained-sngl"),
+          new PrimitiveType(REQUIRED, DOUBLE, "unconstrained-dbl"),
+          Types.optional(INT32).as(OriginalType.INT_8).named("int8"),
+          Types.optional(INT32).as(OriginalType.UINT_8).named("uint8"),
+          Types.optional(INT32).as(OriginalType.INT_16).named("int16"),
+          Types.optional(INT32).as(OriginalType.UINT_16).named("uint16"),
+          Types.optional(INT32).as(OriginalType.INT_32).named("int32"),
+          Types.optional(INT32).as(OriginalType.UINT_32).named("uint32"),
+          Types.optional(INT64).as(OriginalType.INT_64).named("int64"),
+          Types.optional(INT64).as(OriginalType.UINT_64).named("uint64"),
+          Types.optional(INT32).as(OriginalType.DECIMAL).precision(9).scale(2).named("decimal-int32"),
+          Types.optional(INT64).as(OriginalType.DECIMAL).precision(18).scale(4).named("decimal-int64"),
+          Types.optional(FIXED_LEN_BYTE_ARRAY).length(fixedBinaryLength).as(OriginalType.DECIMAL)
+              .precision(fixedPrecision).scale(fixedScale).named("decimal-fixed"),
+          Types.optional(BINARY).as(OriginalType.DECIMAL).precision(binaryPrecision).scale(binaryScale)
+              .named("decimal-binary"),
+          Types.optional(BINARY).as(OriginalType.UTF8).named("utf8"),
+          Types.optional(BINARY).as(OriginalType.ENUM).named("enum"),
+          Types.optional(BINARY).as(OriginalType.JSON).named("json"),
+          Types.optional(BINARY).as(OriginalType.BSON).named("bson"),
+          Types.optional(INT32).as(OriginalType.DATE).named("date"),
+          Types.optional(INT32).as(OriginalType.TIME_MILLIS).named("time-millis"),
+          Types.optional(INT64).as(OriginalType.TIME_MICROS).named("time-micros"),
+          Types.optional(INT64).as(OriginalType.TIMESTAMP_MILLIS).named("timestamp-millis"),
+          Types.optional(INT64).as(OriginalType.TIMESTAMP_MICROS).named("timestamp-micros"),
+          Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(OriginalType.INTERVAL).named("interval")
       );
     }
 
+    private static int calculatePrecision(int byteCnt) {
+      String maxValue = BigInteger.valueOf(2L).pow(8 * byteCnt - 1).toString();
+      return maxValue.length() - 1;
+    }
+
     @Override
     public void write(ParquetWriter<Group> writer) throws IOException {
       for (int index = 0; index < recordCount; index++) {
         Group group = new SimpleGroup(super.schema);
 
-        if (!intGenerator.shouldGenerateNull()) {
-          group.append("i32", intGenerator.nextValue());
-        }
-        if (!longGenerator.shouldGenerateNull()) {
-          group.append("i64", longGenerator.nextValue());
-        }
-        if (!int96Generator.shouldGenerateNull()) {
-          group.append("i96", int96Generator.nextBinaryValue());
-        }
-        if (!floatGenerator.shouldGenerateNull()) {
-          group.append("sngl", floatGenerator.nextValue());
-        }
-        if (!doubleGenerator.shouldGenerateNull()) {
-          group.append("dbl", doubleGenerator.nextValue());
-        }
-        if (!stringGenerator.shouldGenerateNull()) {
-          group.append("strings", stringGenerator.nextBinaryValue());
-        }
-        if (!binaryGenerator.shouldGenerateNull()) {
-          group.append("binary", binaryGenerator.nextBinaryValue());
-        }
-        if (!fixedBinaryGenerator.shouldGenerateNull()) {
-          group.append("fixed-binary", fixedBinaryGenerator.nextBinaryValue());
+        for (int column = 0, columnCnt = schema.getFieldCount(); column < columnCnt; ++column) {
+          Type type = schema.getType(column);
+          RandomValueGenerator<?> generator = randomGenerators.get(column);
+          if (type.isRepetition(OPTIONAL) && generator.shouldGenerateNull()) {
+            continue;
+          }
+          switch (type.asPrimitiveType().getPrimitiveTypeName()) {
+          case BINARY:
+          case FIXED_LEN_BYTE_ARRAY:
+          case INT96:
+            group.append(type.getName(), ((RandomBinaryBase<?>) generator).nextBinaryValue());
+            break;
+          case INT32:
+            group.append(type.getName(), (Integer) generator.nextValue());
+            break;
+          case INT64:
+            group.append(type.getName(), (Long) generator.nextValue());
+            break;
+          case FLOAT:
+            group.append(type.getName(), (Float) generator.nextValue());
+            break;
+          case DOUBLE:
+            group.append(type.getName(), (Double) generator.nextValue());
+            break;
+          case BOOLEAN:
+            group.append(type.getName(), (Boolean) generator.nextValue());
+            break;
+          }
         }
-        group.append("unconstrained-i32", random.nextInt());
-        group.append("unconstrained-i64", random.nextLong());
-        group.append("unconstrained-sngl", random.nextFloat());
-        group.append("unconstrained-dbl", random.nextDouble());
-
         writer.write(group);
       }
     }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java
index 0439686..66b804c 100644
--- a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java
+++ b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java
@@ -19,8 +19,6 @@
 package org.apache.parquet.hadoop.thrift;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.Arrays;
@@ -53,6 +51,7 @@ import org.junit.Test;
 import org.apache.parquet.example.data.Group;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.TestUtils;
 import org.apache.parquet.hadoop.example.GroupReadSupport;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 
@@ -122,21 +121,21 @@ public class TestThriftToParquetFileWriter {
         for(ColumnChunkMetaData cmd: bmd.getColumns()) {
           switch(cmd.getType()) {
             case INT32:
-              assertTrue(intStatsSmall.equals((IntStatistics)cmd.getStatistics()));
+              TestUtils.assertStatsValuesEqual(intStatsSmall, cmd.getStatistics());
               break;
             case INT64:
-              assertTrue(longStatsSmall.equals((LongStatistics)cmd.getStatistics()));
+              TestUtils.assertStatsValuesEqual(longStatsSmall, cmd.getStatistics());
               break;
             case DOUBLE:
-              assertTrue(doubleStatsSmall.equals((DoubleStatistics)cmd.getStatistics()));
+              TestUtils.assertStatsValuesEqual(doubleStatsSmall, cmd.getStatistics());
               break;
             case BOOLEAN:
-              assertTrue(boolStats.equals((BooleanStatistics)cmd.getStatistics()));
+              TestUtils.assertStatsValuesEqual(boolStats, cmd.getStatistics());
               break;
             case BINARY:
               // there is also info_string that has no statistics
               if(cmd.getPath().toString() == "[test_string]")
-                assertTrue(binaryStatsSmall.equals((BinaryStatistics)cmd.getStatistics()));
+                TestUtils.assertStatsValuesEqual(binaryStatsSmall, cmd.getStatistics());
               break;
            }
         }
@@ -171,21 +170,21 @@ public class TestThriftToParquetFileWriter {
              case INT32:
                // testing the correct limits of an int32, there are also byte and short, tested earlier
                if(cmd.getPath().toString() == "[test_i32]")
-                 assertTrue(intStatsLarge.equals((IntStatistics)cmd.getStatistics()));
+                 TestUtils.assertStatsValuesEqual(intStatsLarge, cmd.getStatistics());
                break;
              case INT64:
-               assertTrue(longStatsLarge.equals((LongStatistics)cmd.getStatistics()));
+               TestUtils.assertStatsValuesEqual(longStatsLarge, cmd.getStatistics());
                break;
              case DOUBLE:
-               assertTrue(doubleStatsLarge.equals((DoubleStatistics)cmd.getStatistics()));
+               TestUtils.assertStatsValuesEqual(doubleStatsLarge, cmd.getStatistics());
                break;
              case BOOLEAN:
-               assertTrue(boolStats.equals((BooleanStatistics)cmd.getStatistics()));
+               TestUtils.assertStatsValuesEqual(boolStats, cmd.getStatistics());
                break;
              case BINARY:
                // there is also info_string that has no statistics
                if(cmd.getPath().toString() == "[test_string]")
-                 assertTrue(binaryStatsLarge.equals((BinaryStatistics)cmd.getStatistics()));
+                 TestUtils.assertStatsValuesEqual(binaryStatsLarge, cmd.getStatistics());
                break;
            }
         }


Mime
View raw message