parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ziva...@apache.org
Subject [parquet-mr] branch master updated: PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511)
Date Tue, 07 Aug 2018 15:56:44 GMT
This is an automated email from the ASF dual-hosted git repository.

zivanfi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 55e9497  PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511)
55e9497 is described below

commit 55e94974e0547085a66c6242336e56230f996d52
Author: nandorKollar <nandorKollar@users.noreply.github.com>
AuthorDate: Tue Aug 7 17:56:42 2018 +0200

    PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511)
---
 .../format/converter/ParquetMetadataConverter.java | 15 +++---
 .../converter/TestParquetMetadataConverter.java    | 61 +++++++++++++++++++++-
 2 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index d222505..1442910 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -799,7 +799,7 @@ public class ParquetMetadataConverter {
   }
 
   // Visible for testing
-  LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaElement)
{
+  LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement schemaElement)
{
     switch (type) {
       case UTF8:
         return LogicalTypeAnnotation.stringType();
@@ -852,7 +852,7 @@ public class ParquetMetadataConverter {
     }
   }
 
-  LogicalTypeAnnotation getOriginalType(LogicalType type) {
+  LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
     switch (type.getSetField()) {
       case MAP:
         return LogicalTypeAnnotation.mapType();
@@ -1194,12 +1194,15 @@ public class ParquetMetadataConverter {
       }
 
       if (schemaElement.isSetLogicalType()) {
-        childBuilder.as(getOriginalType(schemaElement.logicalType));
+        childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType));
       }
       if (schemaElement.isSetConverted_type()) {
-        LogicalTypeAnnotation originalType = getOriginalType(schemaElement.converted_type,
schemaElement);
-        LogicalTypeAnnotation newLogicalType = schemaElement.isSetLogicalType() ? getOriginalType(schemaElement.logicalType)
: null;
-        if (!originalType.equals(newLogicalType)) {
+        OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type,
schemaElement).toOriginalType();
+        OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType)
!= null) ?
+           getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null;
+        if (!originalType.equals(newOriginalType)) {
+          LOG.warn("Converted type and logical type metadata mismatch (convertedType: {},
logical type: {}). Using value in converted type.",
+            schemaElement.converted_type, schemaElement.logicalType);
           childBuilder.as(originalType);
         }
       }
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 1474525..d1a3a3c 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -20,6 +20,8 @@ package org.apache.parquet.format.converter;
 
 import static java.util.Collections.emptyList;
 import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -163,6 +165,61 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
+  public void testIncompatibleLogicalAndConvertedTypes() {
+    ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
+    MessageType schema = Types.buildMessage()
+      .required(PrimitiveTypeName.BINARY)
+      .as(OriginalType.DECIMAL).precision(9).scale(2)
+      .named("aBinary")
+      .named("Message");
+    MessageType expected = Types.buildMessage()
+      .required(PrimitiveTypeName.BINARY)
+      .as(LogicalTypeAnnotation.jsonType())
+      .named("aBinary")
+      .named("Message");
+
+    List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(schema);
+    // Set converted type field to a different type to verify that in case of mismatch, it
overrides logical type
+    parquetSchema.get(1).setConverted_type(ConvertedType.JSON);
+    MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+    assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testTimeLogicalTypes() {
+    ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
+    MessageType expected = Types.buildMessage()
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimestampNonUtcMillis")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimestampUtcMillis")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimestampNonUtcMicros")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimestampUtcMicros")
+      .required(PrimitiveTypeName.INT32)
+      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimeNonUtcMillis")
+      .required(PrimitiveTypeName.INT32)
+      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimeUtcMillis")
+      .required(PrimitiveTypeName.INT64)
+      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimeNonUtcMicros")
+      .required(PrimitiveTypeName.INT64)
+      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimeUtcMicros")
+      .named("Message");
+    List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected);
+    MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+    assertEquals(expected, schema);
+  }
+
+  @Test
   public void testEnumEquivalence() {
     ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
     for (org.apache.parquet.column.Encoding encoding : org.apache.parquet.column.Encoding.values())
{
@@ -184,11 +241,11 @@ public class TestParquetMetadataConverter {
       assertEquals(type, parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type)));
     }
     for (OriginalType original : OriginalType.values()) {
-      assertEquals(original, parquetMetadataConverter.getOriginalType(
+      assertEquals(original, parquetMetadataConverter.getLogicalTypeAnnotation(
         parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.fromOriginalType(original,
null)), null).toOriginalType());
     }
     for (ConvertedType converted : ConvertedType.values()) {
-      assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getOriginalType(converted,
null)));
+      assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getLogicalTypeAnnotation(converted,
null)));
     }
   }
 


Mime
View raw message