parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject [parquet-mr] branch master updated: PARQUET-1487: Do not write original type for timezone-agnostic timestamps (#585)
Date Wed, 09 Jan 2019 12:42:06 GMT
This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 797e32a  PARQUET-1487: Do not write original type for timezone-agnostic timestamps
(#585)
797e32a is described below

commit 797e32aca0eadd1d460e5f5cd477e37bc828b67d
Author: nandorKollar <nandorKollar@users.noreply.github.com>
AuthorDate: Wed Jan 9 13:42:00 2019 +0100

    PARQUET-1487: Do not write original type for timezone-agnostic timestamps (#585)
---
 .../parquet/schema/LogicalTypeAnnotation.java      |  6 ++
 .../format/converter/ParquetMetadataConverter.java |  6 ++
 .../converter/TestParquetMetadataConverter.java    | 80 ++++++++++++++++++----
 3 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 5f61ed6..4472376 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -555,6 +555,9 @@ public abstract class LogicalTypeAnnotation {
     @Override
     @InterfaceAudience.Private
     public OriginalType toOriginalType() {
+      if (!isAdjustedToUTC) {
+        return null;
+      }
       switch (unit) {
         case MILLIS:
           return OriginalType.TIME_MILLIS;
@@ -634,6 +637,9 @@ public abstract class LogicalTypeAnnotation {
     @Override
     @InterfaceAudience.Private
     public OriginalType toOriginalType() {
+      if (!isAdjustedToUTC) {
+        return null;
+      }
       switch (unit) {
         case MILLIS:
           return OriginalType.TIMESTAMP_MILLIS;
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index efb0608..fb0ca7b 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -309,6 +309,9 @@ public class ParquetMetadataConverter {
 
     @Override
     public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation
timeLogicalType) {
+      if (!timeLogicalType.isAdjustedToUTC()) {
+        return empty();
+      }
       switch (timeLogicalType.getUnit()) {
         case MILLIS:
           return of(ConvertedType.TIME_MILLIS);
@@ -323,6 +326,9 @@ public class ParquetMetadataConverter {
 
     @Override
     public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
timestampLogicalType) {
+      if (!timestampLogicalType.isAdjustedToUTC()) {
+        return empty();
+      }
       switch (timestampLogicalType.getUnit()) {
         case MICROS:
           return of(ConvertedType.TIMESTAMP_MICROS);
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 358a29a..65244f4 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -20,6 +20,18 @@ package org.apache.parquet.format.converter;
 
 import static java.util.Collections.emptyList;
 import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.listType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.mapType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
@@ -196,40 +208,40 @@ public class TestParquetMetadataConverter {
     ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
     MessageType expected = Types.buildMessage()
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timestampType(false, MILLIS))
       .named("aTimestampNonUtcMillis")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timestampType(true, MILLIS))
       .named("aTimestampUtcMillis")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timestampType(false, MICROS))
       .named("aTimestampNonUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timestampType(true, MICROS))
       .named("aTimestampUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timestampType(false, NANOS))
       .named("aTimestampNonUtcNanos")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timestampType(true, NANOS))
       .named("aTimestampUtcNanos")
       .required(PrimitiveTypeName.INT32)
-      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timeType(false, MILLIS))
       .named("aTimeNonUtcMillis")
       .required(PrimitiveTypeName.INT32)
-      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timeType(true, MILLIS))
       .named("aTimeUtcMillis")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timeType(false, MICROS))
       .named("aTimeNonUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timeType(true, MICROS))
       .named("aTimeUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timeType(false, NANOS))
       .named("aTimeNonUtcNanos")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timeType(true, NANOS))
       .named("aTimeUtcNanos")
       .named("Message");
     List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected);
@@ -238,6 +250,48 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
+  public void testLogicalToConvertedTypeConversion() {
+    ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
+
+    assertEquals(ConvertedType.UTF8, parquetMetadataConverter.convertToConvertedType(stringType()));
+    assertEquals(ConvertedType.ENUM, parquetMetadataConverter.convertToConvertedType(enumType()));
+
+    assertEquals(ConvertedType.INT_8, parquetMetadataConverter.convertToConvertedType(intType(8,
true)));
+    assertEquals(ConvertedType.INT_16, parquetMetadataConverter.convertToConvertedType(intType(16,
true)));
+    assertEquals(ConvertedType.INT_32, parquetMetadataConverter.convertToConvertedType(intType(32,
true)));
+    assertEquals(ConvertedType.INT_64, parquetMetadataConverter.convertToConvertedType(intType(64,
true)));
+    assertEquals(ConvertedType.UINT_8, parquetMetadataConverter.convertToConvertedType(intType(8,
false)));
+    assertEquals(ConvertedType.UINT_16, parquetMetadataConverter.convertToConvertedType(intType(16,
false)));
+    assertEquals(ConvertedType.UINT_32, parquetMetadataConverter.convertToConvertedType(intType(32,
false)));
+    assertEquals(ConvertedType.UINT_64, parquetMetadataConverter.convertToConvertedType(intType(64,
false)));
+    assertEquals(ConvertedType.DECIMAL, parquetMetadataConverter.convertToConvertedType(decimalType(8,
16)));
+
+    assertEquals(ConvertedType.TIMESTAMP_MILLIS, parquetMetadataConverter.convertToConvertedType(timestampType(true,
MILLIS)));
+    assertEquals(ConvertedType.TIMESTAMP_MICROS, parquetMetadataConverter.convertToConvertedType(timestampType(true,
MICROS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(true, NANOS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, MILLIS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, MICROS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, NANOS)));
+
+    assertEquals(ConvertedType.TIME_MILLIS, parquetMetadataConverter.convertToConvertedType(timeType(true,
MILLIS)));
+    assertEquals(ConvertedType.TIME_MICROS, parquetMetadataConverter.convertToConvertedType(timeType(true,
MICROS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(true, NANOS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, MILLIS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, MICROS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, NANOS)));
+
+    assertEquals(ConvertedType.DATE, parquetMetadataConverter.convertToConvertedType(dateType()));
+
+    assertEquals(ConvertedType.INTERVAL, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()));
+    assertEquals(ConvertedType.JSON, parquetMetadataConverter.convertToConvertedType(jsonType()));
+    assertEquals(ConvertedType.BSON, parquetMetadataConverter.convertToConvertedType(bsonType()));
+
+    assertEquals(ConvertedType.LIST, parquetMetadataConverter.convertToConvertedType(listType()));
+    assertEquals(ConvertedType.MAP, parquetMetadataConverter.convertToConvertedType(mapType()));
+    assertEquals(ConvertedType.MAP_KEY_VALUE, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance()));
+  }
+
+  @Test
   public void testEnumEquivalence() {
     ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
     for (org.apache.parquet.column.Encoding encoding : org.apache.parquet.column.Encoding.values())
{
@@ -1024,7 +1078,7 @@ public class TestParquetMetadataConverter {
     stats.updateStats(200l);
     stats.updateStats(500l);
     builder.add(stats);
-    org.apache.parquet.format.ColumnIndex parquetColumnIndex = 
+    org.apache.parquet.format.ColumnIndex parquetColumnIndex =
         ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
     ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
     assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());


Mime
View raw message