parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject [parquet-mr] branch master updated: PARQUET-1297: SchemaConverter should not convert from Timestamp(TimeUnit.SECOND) and Timestamp(TimeUnit.NANOSECOND) of Arrow (#477)
Date Sun, 13 May 2018 17:31:05 GMT
This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new b635beb  PARQUET-1297: SchemaConverter should not convert from Timestamp(TimeUnit.SECOND)
and Timestamp(TimeUnit.NANOSECOND) of Arrow (#477)
b635beb is described below

commit b635beb6efc07a97c143775c78a32d42b3b73c8e
Author: Masayuki Takahashi <masayuki038@gmail.com>
AuthorDate: Mon May 14 02:31:02 2018 +0900

    PARQUET-1297: SchemaConverter should not convert from Timestamp(TimeUnit.SECOND) and Timestamp(TimeUnit.NANOSECOND)
of Arrow (#477)
    
    Arrow's 'Timestamp' definition is below:
    {
      "name" : "timestamp",
      "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND"
    }
    http://arrow.apache.org/docs/metadata.html
    
    But Parquet only supports 'TIMESTAMP_MILLIS' and 'TIMESTAMP_MICROS'.
     https://github.com/Apache/parquet-format/blob/master/LogicalTypes.md
    
    Therefore SchemaConverter should not convert from Timestamp(TimeUnit.SECOND) and Timestamp(TimeUnit.NANOSECOND)
of Arrow to Parquet.
    
    Related:
    https://issues.apache.org/jira/browse/PARQUET-1285
    
    Author: Masayuki Takahashi <masayuki038@gmail.com>
---
 .../parquet/arrow/schema/SchemaConverter.java      | 17 +++---
 .../parquet/arrow/schema/TestSchemaConverter.java  | 63 ++++++++++++++++++++++
 2 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index f298558..a7df48c 100644
--- a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++ b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -27,6 +27,7 @@ import static org.apache.parquet.schema.OriginalType.INT_32;
 import static org.apache.parquet.schema.OriginalType.INT_64;
 import static org.apache.parquet.schema.OriginalType.INT_8;
 import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
 import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
 import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
 import static org.apache.parquet.schema.OriginalType.UINT_16;
@@ -259,7 +260,13 @@ public class SchemaConverter {
 
       @Override
       public TypeMapping visit(Timestamp type) {
-        return primitive(INT64, TIMESTAMP_MILLIS);
+        TimeUnit timeUnit = type.getUnit();
+        if (timeUnit == TimeUnit.MILLISECOND) {
+          return primitive(INT64, TIMESTAMP_MILLIS);
+        } else if (timeUnit == TimeUnit.MICROSECOND) {
+          return primitive(INT64, TIMESTAMP_MICROS);
+        }
+        throw new UnsupportedOperationException("Unsupported type " + type);
       }
 
       /**
@@ -415,14 +422,9 @@ public class SchemaConverter {
             return decimal(type.getDecimalMetadata());
           case DATE:
             return field(new ArrowType.Date(DateUnit.DAY));
-          case TIMESTAMP_MICROS:
-            return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"));
-          case TIMESTAMP_MILLIS:
-            return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"));
           case TIME_MILLIS:
             return field(new ArrowType.Time(TimeUnit.MILLISECOND, 32));
           default:
-          case TIME_MICROS:
           case INT_64:
           case UINT_64:
           case UTF8:
@@ -433,6 +435,9 @@ public class SchemaConverter {
           case LIST:
           case MAP:
           case MAP_KEY_VALUE:
+          case TIMESTAMP_MICROS:
+          case TIMESTAMP_MILLIS:
+          case TIME_MICROS:
             throw new IllegalArgumentException("illegal type " + type);
         }
       }
diff --git a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index 4c3da35..2d1f028 100644
--- a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++ b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -27,6 +27,7 @@ import static org.apache.parquet.schema.OriginalType.INT_32;
 import static org.apache.parquet.schema.OriginalType.INT_64;
 import static org.apache.parquet.schema.OriginalType.INT_8;
 import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
 import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
 import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
 import static org.apache.parquet.schema.OriginalType.UINT_16;
@@ -413,4 +414,66 @@ public class TestSchemaConverter {
     converter.fromParquet(Types.buildMessage()
       .addField(Types.optional(INT32).as(TIME_MICROS).named("a")).named("root"));
   }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testArrowTimestampSecondToParquet() {
+    converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"))
+    ))).getParquetSchema();
+  }
+
+  @Test
+  public void testArrowTimestampMillisecondToParquet() {
+    MessageType expected = converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
+    ))).getParquetSchema();
+    Assert.assertEquals(expected, Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root"));
+  }
+
+  @Test
+  public void testArrowTimestampMicrosecondToParquet() {
+    MessageType expected = converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"))
+    ))).getParquetSchema();
+    Assert.assertEquals(expected, Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root"));
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testArrowTimestampNanosecondToParquet() {
+    converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"))
+    ))).getParquetSchema();
+  }
+
+  @Test
+  public void testParquetInt64TimestampMillisToArrow() {
+    MessageType parquet = Types.buildMessage()
+      .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
+    ));
+    Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
+  }
+
+  @Test
+  public void testParquetInt64TimestampMicrosToArrow() {
+    MessageType parquet = Types.buildMessage()
+      .addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"))
+    ));
+    Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testParquetInt32TimestampMillisToArrow() {
+    converter.fromParquet(Types.buildMessage()
+      .addField(Types.optional(INT32).as(TIMESTAMP_MILLIS).named("a")).named("root"));
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testParquetInt32TimestampMicrosToArrow() {
+    converter.fromParquet(Types.buildMessage()
+      .addField(Types.optional(INT32).as(TIMESTAMP_MICROS).named("a")).named("root"));
+  }
 }

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.

Mime
View raw message