parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject [parquet-mr] branch master updated: PARQUET-1504: Add an option to convert Int96 to Arrow Timestamp (#594)
Date Sun, 27 Jan 2019 20:25:57 GMT
This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 00a7a47  PARQUET-1504: Add an option to convert Int96 to Arrow Timestamp (#594)
00a7a47 is described below

commit 00a7a470dbf73d6ae3bdd0774706abcda353b178
Author: Yongyan Wang <36677000+yongyanw@users.noreply.github.com>
AuthorDate: Sun Jan 27 12:25:53 2019 -0800

    PARQUET-1504: Add an option to convert Int96 to Arrow Timestamp (#594)
    
    PARQUET-1504: Add an option to convert Parquet Int96 to Arrow Timestamp
---
 .../parquet/arrow/schema/SchemaConverter.java      | 16 ++++++++++++++--
 .../parquet/arrow/schema/TestSchemaConverter.java  | 22 ++++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index 0bfb888..6275ca3 100644
--- a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++ b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -86,10 +86,19 @@ import org.apache.parquet.schema.Types.GroupBuilder;
  */
 public class SchemaConverter {
 
+  // Indicates if Int96 should be converted to Arrow Timestamp
+  private final boolean convertInt96ToArrowTimestamp;
+
   /**
    * For when we'll need this to be configurable
    */
   public SchemaConverter() {
+    this(false);
+  }
+
+  // TODO(PARQUET-1511): pass the parameters in a configuration object
+  public SchemaConverter(final boolean convertInt96ToArrowTimestamp) {
+    this.convertInt96ToArrowTimestamp = convertInt96ToArrowTimestamp;
   }
 
   /**
@@ -492,8 +501,11 @@ public class SchemaConverter {
 
       @Override
       public TypeMapping convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException
{
-        // Possibly timestamp
-        return field(new ArrowType.Binary());
+        if (convertInt96ToArrowTimestamp) {
+          return field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null));
+        } else {
+          return field(new ArrowType.Binary());
+        }
       }
 
       @Override
diff --git a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index e21f36c..764621a 100644
--- a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++ b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -47,6 +47,7 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LE
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
 
 import java.io.IOException;
 import java.util.List;
@@ -439,6 +440,27 @@ public class TestSchemaConverter {
     Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
   }
 
+  @Test
+  public void testParquetInt96ToArrowBinary() {
+    MessageType parquet = Types.buildMessage()
+      .addField(Types.optional(INT96).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Binary())
+    ));
+    Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
+  }
+
+  @Test
+  public void testParquetInt96ToArrowTimestamp() {
+    final SchemaConverter converterInt96ToTimestamp = new SchemaConverter(true);
+    MessageType parquet = Types.buildMessage()
+      .addField(Types.optional(INT96).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, null))
+    ));
+    Assert.assertEquals(expected, converterInt96ToTimestamp.fromParquet(parquet).getArrowSchema());
+  }
+
   @Test(expected = IllegalStateException.class)
   public void testParquetInt64TimeMillisToArrow() {
     converter.fromParquet(Types.buildMessage()


Mime
View raw message