This is an automated email from the ASF dual-hosted git repository.
zivanfi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 55e9497 PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511)
55e9497 is described below
commit 55e94974e0547085a66c6242336e56230f996d52
Author: nandorKollar <nandorKollar@users.noreply.github.com>
AuthorDate: Tue Aug 7 17:56:42 2018 +0200
PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511)
---
.../format/converter/ParquetMetadataConverter.java | 15 +++---
.../converter/TestParquetMetadataConverter.java | 61 +++++++++++++++++++++-
2 files changed, 68 insertions(+), 8 deletions(-)
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index d222505..1442910 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -799,7 +799,7 @@ public class ParquetMetadataConverter {
}
// Visible for testing
- LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaElement)
{
+ LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement schemaElement)
{
switch (type) {
case UTF8:
return LogicalTypeAnnotation.stringType();
@@ -852,7 +852,7 @@ public class ParquetMetadataConverter {
}
}
- LogicalTypeAnnotation getOriginalType(LogicalType type) {
+ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
switch (type.getSetField()) {
case MAP:
return LogicalTypeAnnotation.mapType();
@@ -1194,12 +1194,15 @@ public class ParquetMetadataConverter {
}
if (schemaElement.isSetLogicalType()) {
- childBuilder.as(getOriginalType(schemaElement.logicalType));
+ childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType));
}
if (schemaElement.isSetConverted_type()) {
- LogicalTypeAnnotation originalType = getOriginalType(schemaElement.converted_type,
schemaElement);
- LogicalTypeAnnotation newLogicalType = schemaElement.isSetLogicalType() ? getOriginalType(schemaElement.logicalType)
: null;
- if (!originalType.equals(newLogicalType)) {
+ OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type,
schemaElement).toOriginalType();
+ OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType)
!= null) ?
+ getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null;
+ if (!originalType.equals(newOriginalType)) {
+ LOG.warn("Converted type and logical type metadata mismatch (convertedType: {},
logical type: {}). Using value in converted type.",
+ schemaElement.converted_type, schemaElement.logicalType);
childBuilder.as(originalType);
}
}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 1474525..d1a3a3c 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -20,6 +20,8 @@ package org.apache.parquet.format.converter;
import static java.util.Collections.emptyList;
import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -163,6 +165,61 @@ public class TestParquetMetadataConverter {
}
@Test
+ public void testIncompatibleLogicalAndConvertedTypes() {
+ ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
+ MessageType schema = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(OriginalType.DECIMAL).precision(9).scale(2)
+ .named("aBinary")
+ .named("Message");
+ MessageType expected = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.jsonType())
+ .named("aBinary")
+ .named("Message");
+
+ List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(schema);
+ // Set converted type field to a different type to verify that in case of mismatch, it
overrides logical type
+ parquetSchema.get(1).setConverted_type(ConvertedType.JSON);
+ MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void testTimeLogicalTypes() {
+ ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
+ MessageType expected = Types.buildMessage()
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimestampNonUtcMillis")
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimestampUtcMillis")
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimestampNonUtcMicros")
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimestampUtcMicros")
+ .required(PrimitiveTypeName.INT32)
+ .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimeNonUtcMillis")
+ .required(PrimitiveTypeName.INT32)
+ .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimeUtcMillis")
+ .required(PrimitiveTypeName.INT64)
+ .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimeNonUtcMicros")
+ .required(PrimitiveTypeName.INT64)
+ .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimeUtcMicros")
+ .named("Message");
+ List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected);
+ MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+ assertEquals(expected, schema);
+ }
+
+ @Test
public void testEnumEquivalence() {
ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
for (org.apache.parquet.column.Encoding encoding : org.apache.parquet.column.Encoding.values())
{
@@ -184,11 +241,11 @@ public class TestParquetMetadataConverter {
assertEquals(type, parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type)));
}
for (OriginalType original : OriginalType.values()) {
- assertEquals(original, parquetMetadataConverter.getOriginalType(
+ assertEquals(original, parquetMetadataConverter.getLogicalTypeAnnotation(
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.fromOriginalType(original,
null)), null).toOriginalType());
}
for (ConvertedType converted : ConvertedType.values()) {
- assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getOriginalType(converted,
null)));
+ assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getLogicalTypeAnnotation(converted,
null)));
}
}
|