parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject [parquet-mr] branch master updated: PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for SchemaConverter
Date Sat, 21 Apr 2018 13:58:33 GMT
This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new af977ad  PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for SchemaConverter
af977ad is described below

commit af977adc43a071a09652fea4ce3deba2d5b8d171
Author: Masayuki Takahashi <masayuki038@gmail.com>
AuthorDate: Sat Apr 21 14:58:35 2018 +0100

    PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for SchemaConverter
    
    When I converted parquet(1.9.1-SNAPSHOT) schema to arrow(0.4.0) with SchemaConverter,
this exception raised.
    ```
    java.lang.NoClassDefFoundError: org/apache/arrow/vector/types/pojo/ArrowType$Struct_
    
    	at net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:67)
    	at net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:40)
    	at net.wrap_trap.parquet_arrow.ParquetToArrowConverterTest.parquetToArrowConverterTest(ParquetToArrowConverterTest.java:27)
    ```
    
    This reason is that SchemaConverter refer to Apache Arrow 0.1.0.
    I upgrade the Apache Arrow version to 0.8.0(latest) for SchemaConverter.
    
    Author: Masayuki Takahashi <masayuki038@gmail.com>
    
    Closes #443 from masayuki038/PARQUET-1128 and squashes the following commits:
    
    8ba47813 [Masayuki Takahashi] PARQUET-1128: [Java] Upgrade the Apache Arrow version to
0.8.0 for SchemaConverter
    b80d793a [Masayuki Takahashi] PARQUET-1128: [Java] Upgrade the Apache Arrow version to
0.8.0 for SchemaConverter
---
 parquet-arrow/pom.xml                              |   2 +-
 .../parquet/arrow/schema/SchemaConverter.java      | 227 +++++++-------
 .../parquet/arrow/schema/TestSchemaConverter.java  | 344 +++++++++++----------
 3 files changed, 299 insertions(+), 274 deletions(-)

diff --git a/parquet-arrow/pom.xml b/parquet-arrow/pom.xml
index de31e16..232167e 100644
--- a/parquet-arrow/pom.xml
+++ b/parquet-arrow/pom.xml
@@ -33,7 +33,7 @@
   <url>https://parquet.apache.org</url>
 
   <properties>
-    <arrow.version>0.1.0</arrow.version>
+    <arrow.version>0.8.0</arrow.version>
   </properties>
 
   <dependencies>
diff --git a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index cf4ec0d..1d69c45 100644
--- a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++ b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -47,8 +47,8 @@ import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.arrow.flatbuf.Precision;
-import org.apache.arrow.flatbuf.TimeUnit;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
 import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
@@ -59,7 +59,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
 import org.apache.arrow.vector.types.pojo.ArrowType.Int;
 import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
 import org.apache.arrow.vector.types.pojo.ArrowType.Null;
-import org.apache.arrow.vector.types.pojo.ArrowType.Struct_;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
 import org.apache.arrow.vector.types.pojo.ArrowType.Time;
 import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
 import org.apache.arrow.vector.types.pojo.ArrowType.Union;
@@ -141,13 +141,22 @@ public class SchemaConverter {
       }
 
       @Override
-      public TypeMapping visit(Struct_ type) {
+      public TypeMapping visit(Struct type) {
         List<TypeMapping> parquetTypes = fromArrow(children);
         return new StructTypeMapping(field, addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName),
parquetTypes);
       }
 
       @Override
       public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+        return createListTypeMapping();
+      }
+
+      @Override
+      public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList
type) {
+        return createListTypeMapping();
+      }
+
+      private ListTypeMapping createListTypeMapping() {
         if (children.size() != 1) {
           throw new IllegalArgumentException("list fields must have exactly one child: "
+ field);
         }
@@ -167,31 +176,31 @@ public class SchemaConverter {
       public TypeMapping visit(Int type) {
         boolean signed = type.getIsSigned();
         switch (type.getBitWidth()) {
-        case 8:
-          return primitive(INT32, signed ? INT_8 : UINT_8);
-        case 16:
-          return primitive(INT32, signed ? INT_16 : UINT_16);
-        case 32:
-          return primitive(INT32, signed ? INT_32 : UINT_32);
-        case 64:
-          return primitive(INT64, signed ? INT_64 : UINT_64);
-        default:
-          throw new IllegalArgumentException("Illegal int type: " + field);
+          case 8:
+            return primitive(INT32, signed ? INT_8 : UINT_8);
+          case 16:
+            return primitive(INT32, signed ? INT_16 : UINT_16);
+          case 32:
+            return primitive(INT32, signed ? INT_32 : UINT_32);
+          case 64:
+            return primitive(INT64, signed ? INT_64 : UINT_64);
+          default:
+            throw new IllegalArgumentException("Illegal int type: " + field);
         }
       }
 
       @Override
       public TypeMapping visit(FloatingPoint type) {
         switch (type.getPrecision()) {
-        case Precision.HALF:
-          // TODO(PARQUET-757): original type HalfFloat
-          return primitive(FLOAT);
-        case Precision.SINGLE:
-          return primitive(FLOAT);
-        case Precision.DOUBLE:
-          return primitive(DOUBLE);
-        default:
-          throw new IllegalArgumentException("Illegal float type: " + field);
+          case HALF:
+            // TODO(PARQUET-757): original type HalfFloat
+            return primitive(FLOAT);
+          case SINGLE:
+            return primitive(FLOAT);
+          case DOUBLE:
+            return primitive(DOUBLE);
+          default:
+            throw new IllegalArgumentException("Illegal float type: " + field);
         }
       }
 
@@ -336,7 +345,7 @@ public class SchemaConverter {
     OriginalType ot = type.getOriginalType();
     if (ot == null) {
       List<TypeMapping> typeMappings = fromParquet(type.getFields());
-      Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct_(), fields(typeMappings));
+      Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct(), fields(typeMappings));
       return new StructTypeMapping(arrowField, type, typeMappings);
     } else {
       switch (ot) {
@@ -366,12 +375,12 @@ public class SchemaConverter {
 
       @Override
       public TypeMapping convertFLOAT(PrimitiveTypeName primitiveTypeName) throws RuntimeException
{
-        return field(new ArrowType.FloatingPoint(Precision.SINGLE));
+        return field(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
       }
 
       @Override
       public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws RuntimeException
{
-        return field(new ArrowType.FloatingPoint(Precision.DOUBLE));
+        return field(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
       }
 
       @Override
@@ -381,41 +390,41 @@ public class SchemaConverter {
           return integer(32, true);
         }
         switch (ot) {
-        case INT_8:
-          return integer(8, true);
-        case INT_16:
-          return integer(16, true);
-        case INT_32:
-          return integer(32, true);
-        case UINT_8:
-          return integer(8, false);
-        case UINT_16:
-          return integer(16, false);
-        case UINT_32:
-          return integer(32, false);
-        case DECIMAL:
-          return decimal(type.getDecimalMetadata());
-        case DATE:
-          return field(new ArrowType.Date());
-        case TIMESTAMP_MICROS:
-          return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
-        case TIMESTAMP_MILLIS:
-          return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
-        case TIME_MILLIS:
-          return field(new ArrowType.Time());
-        default:
-        case TIME_MICROS:
-        case INT_64:
-        case UINT_64:
-        case UTF8:
-        case ENUM:
-        case BSON:
-        case INTERVAL:
-        case JSON:
-        case LIST:
-        case MAP:
-        case MAP_KEY_VALUE:
-          throw new IllegalArgumentException("illegal type " + type);
+          case INT_8:
+            return integer(8, true);
+          case INT_16:
+            return integer(16, true);
+          case INT_32:
+            return integer(32, true);
+          case UINT_8:
+            return integer(8, false);
+          case UINT_16:
+            return integer(16, false);
+          case UINT_32:
+            return integer(32, false);
+          case DECIMAL:
+            return decimal(type.getDecimalMetadata());
+          case DATE:
+            return field(new ArrowType.Date(DateUnit.DAY));
+          case TIMESTAMP_MICROS:
+            return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND,
"UTC"));
+          case TIMESTAMP_MILLIS:
+            return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND,
"UTC"));
+          case TIME_MILLIS:
+            return field(new ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND,
32));
+          default:
+          case TIME_MICROS:
+          case INT_64:
+          case UINT_64:
+          case UTF8:
+          case ENUM:
+          case BSON:
+          case INTERVAL:
+          case JSON:
+          case LIST:
+          case MAP:
+          case MAP_KEY_VALUE:
+            throw new IllegalArgumentException("illegal type " + type);
         }
       }
 
@@ -426,43 +435,42 @@ public class SchemaConverter {
           return integer(64, true);
         }
         switch (ot) {
-        case INT_8:
-          return integer(8, true);
-        case INT_16:
-          return integer(16, true);
-        case INT_32:
-          return integer(32, true);
-        case INT_64:
-          return integer(64, true);
-        case UINT_8:
-          return integer(8, false);
-        case UINT_16:
-          return integer(16, false);
-        case UINT_32:
-          return integer(32, false);
-        case UINT_64:
-          return integer(64, false);
-        case DECIMAL:
-          return decimal(type.getDecimalMetadata());
-        case DATE:
-          return field(new ArrowType.Date());
-        case TIMESTAMP_MICROS:
-          return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
-        case TIMESTAMP_MILLIS:
-          return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
-        case TIME_MILLIS:
-          return field(new ArrowType.Time());
-        default:
-        case TIME_MICROS:
-        case UTF8:
-        case ENUM:
-        case BSON:
-        case INTERVAL:
-        case JSON:
-        case LIST:
-        case MAP:
-        case MAP_KEY_VALUE:
-          throw new IllegalArgumentException("illegal type " + type);
+          case INT_8:
+            return integer(8, true);
+          case INT_16:
+            return integer(16, true);
+          case INT_32:
+            return integer(32, true);
+          case INT_64:
+            return integer(64, true);
+          case UINT_8:
+            return integer(8, false);
+          case UINT_16:
+            return integer(16, false);
+          case UINT_32:
+            return integer(32, false);
+          case UINT_64:
+            return integer(64, false);
+          case DECIMAL:
+            return decimal(type.getDecimalMetadata());
+          case DATE:
+            return field(new ArrowType.Date(DateUnit.DAY));
+          case TIMESTAMP_MICROS:
+            return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND,
"UTC"));
+          case TIMESTAMP_MILLIS:
+            return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND,
"UTC"));
+          default:
+          case TIME_MICROS:
+          case UTF8:
+          case ENUM:
+          case BSON:
+          case INTERVAL:
+          case JSON:
+          case LIST:
+          case MAP:
+          case MAP_KEY_VALUE:
+          case TIME_MILLIS:
+            throw new IllegalArgumentException("illegal type " + type);
         }
       }
 
@@ -489,12 +497,12 @@ public class SchemaConverter {
           return field(new ArrowType.Binary());
         }
         switch (ot) {
-        case UTF8:
-          return field(new ArrowType.Utf8());
-        case DECIMAL:
-          return decimal(type.getDecimalMetadata());
-        default:
-          throw new IllegalArgumentException("illegal type " + type);
+          case UTF8:
+            return field(new ArrowType.Utf8());
+          case DECIMAL:
+            return decimal(type.getDecimalMetadata());
+          default:
+            throw new IllegalArgumentException("illegal type " + type);
         }
       }
 
@@ -545,7 +553,7 @@ public class SchemaConverter {
       }
 
       @Override
-      public TypeMapping visit(Struct_ type) {
+      public TypeMapping visit(Struct type) {
         if (parquetField.isPrimitive()) {
           throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
         }
@@ -555,6 +563,15 @@ public class SchemaConverter {
 
       @Override
       public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+        return createListTypeMapping(type);
+      }
+
+      @Override
+      public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList
type) {
+        return createListTypeMapping(type);
+      }
+
+      private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
         if (arrowField.getChildren().size() != 1) {
           throw new IllegalArgumentException("Invalid list type: " + type);
         }
diff --git a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index ec2b807..654f773 100644
--- a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++ b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -43,11 +43,11 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
 
 import java.io.IOException;
 import java.util.List;
+import org.apache.arrow.vector.types.IntervalUnit;
 
-import org.apache.arrow.flatbuf.IntervalUnit;
-import org.apache.arrow.flatbuf.Precision;
-import org.apache.arrow.flatbuf.TimeUnit;
-import org.apache.arrow.flatbuf.UnionMode;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -79,159 +79,167 @@ public class TestSchemaConverter {
   }
 
   private final Schema complexArrowSchema = new Schema(asList(
-      field("a", false, new ArrowType.Int(8, true)),
-      field("b", new ArrowType.Struct_(),
-          field("c", new ArrowType.Int(16, true)),
-          field("d", new ArrowType.Utf8())),
-      field("e", new ArrowType.List(), field(null, new ArrowType.Date())),
-      field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
-      field("g", new ArrowType.Timestamp(TimeUnit.MILLISECOND)),
-      field("h", new ArrowType.Interval(IntervalUnit.DAY_TIME))
-      ));
+    field("a", false, new ArrowType.Int(8, true)),
+    field("b", new ArrowType.Struct(),
+      field("c", new ArrowType.Int(16, true)),
+      field("d", new ArrowType.Utf8())),
+    field("e", new ArrowType.List(), field(null, new ArrowType.Date(DateUnit.DAY))),
+    field("f", new ArrowType.FixedSizeList(1), field(null, new ArrowType.Date(DateUnit.DAY))),
+    field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+    field("h", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND,
"UTC")),
+    field("i", new ArrowType.Interval(IntervalUnit.DAY_TIME))
+  ));
   private final MessageType complexParquetSchema = Types.buildMessage()
-      .addField(Types.optional(INT32).as(INT_8).named("a"))
-      .addField(Types.optionalGroup()
-          .addField(Types.optional(INT32).as(INT_16).named("c"))
-          .addField(Types.optional(BINARY).as(UTF8).named("d"))
-          .named("b"))
-      .addField(Types.optionalList().
-          setElementType(Types.optional(INT32).as(DATE).named("element"))
-          .named("e"))
-      .addField(Types.optional(FLOAT).named("f"))
-      .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("g"))
-      .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("h"))
-      .named("root");
+    .addField(Types.optional(INT32).as(INT_8).named("a"))
+    .addField(Types.optionalGroup()
+      .addField(Types.optional(INT32).as(INT_16).named("c"))
+      .addField(Types.optional(BINARY).as(UTF8).named("d"))
+      .named("b"))
+    .addField(Types.optionalList().
+      setElementType(Types.optional(INT32).as(DATE).named("element"))
+      .named("e"))
+    .addField(Types.optionalList().
+      setElementType(Types.optional(INT32).as(DATE).named("element"))
+      .named("f"))
+    .addField(Types.optional(FLOAT).named("g"))
+    .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("h"))
+    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("i"))
+    .named("root");
 
   private final Schema allTypesArrowSchema = new Schema(asList(
-      field("a", false, new ArrowType.Null()),
-      field("b", new ArrowType.Struct_(), field("ba", new ArrowType.Null())),
-      field("c", new ArrowType.List(), field("ca", new ArrowType.Null())),
-      field("d", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da",
new ArrowType.Null())),
-      field("e", new ArrowType.Int(8, true)),
-      field("e1", new ArrowType.Int(16, true)),
-      field("e2", new ArrowType.Int(32, true)),
-      field("e3", new ArrowType.Int(64, true)),
-      field("e4", new ArrowType.Int(8, false)),
-      field("e5", new ArrowType.Int(16, false)),
-      field("e6", new ArrowType.Int(32, false)),
-      field("e7", new ArrowType.Int(64, false)),
-      field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
-      field("f1", new ArrowType.FloatingPoint(Precision.DOUBLE)),
-      field("g", new ArrowType.Utf8()),
-      field("h", new ArrowType.Binary()),
-      field("i", new ArrowType.Bool()),
-      field("j", new ArrowType.Decimal(5, 5)),
-      field("j1", new ArrowType.Decimal(15, 5)),
-      field("j2", new ArrowType.Decimal(25, 5)),
-      field("k", new ArrowType.Date()),
-      field("l", new ArrowType.Time()),
-      field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND)),
-      field("n", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
-      field("n1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
-      ));
+    field("a", false, new ArrowType.Null()),
+    field("b", new ArrowType.Struct(), field("ba", new ArrowType.Null())),
+    field("c", new ArrowType.List(), field("ca", new ArrowType.Null())),
+    field("d", new ArrowType.FixedSizeList(1), field("da", new ArrowType.Null())),
+    field("e", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("ea", new
ArrowType.Null())),
+    field("f", new ArrowType.Int(8, true)),
+    field("f1", new ArrowType.Int(16, true)),
+    field("f2", new ArrowType.Int(32, true)),
+    field("f3", new ArrowType.Int(64, true)),
+    field("f4", new ArrowType.Int(8, false)),
+    field("f5", new ArrowType.Int(16, false)),
+    field("f6", new ArrowType.Int(32, false)),
+    field("f7", new ArrowType.Int(64, false)),
+    field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+    field("g1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+    field("h", new ArrowType.Utf8()),
+    field("i", new ArrowType.Binary()),
+    field("j", new ArrowType.Bool()),
+    field("k", new ArrowType.Decimal(5, 5)),
+    field("k1", new ArrowType.Decimal(15, 5)),
+    field("k2", new ArrowType.Decimal(25, 5)),
+    field("l", new ArrowType.Date(DateUnit.DAY)),
+    field("m", new ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.SECOND, 32)),
+    field("n", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND,
"UTC")),
+    field("o", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
+    field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
+  ));
   private final MessageType allTypesParquetSchema = Types.buildMessage()
-      .addField(Types.optional(BINARY).named("a"))
-      .addField(Types.optionalGroup()
-          .addField(Types.optional(BINARY).named("ba"))
-          .named("b"))
-      .addField(Types.optionalList().
-          setElementType(Types.optional(BINARY).named("element"))
-          .named("c"))
-      .addField(Types.optionalGroup()
-          .addField(Types.optional(BINARY).named("da"))
-          .named("d"))
-      .addField(Types.optional(INT32).as(INT_8).named("e"))
-      .addField(Types.optional(INT32).as(INT_16).named("e1"))
-      .addField(Types.optional(INT32).as(INT_32).named("e2"))
-      .addField(Types.optional(INT64).as(INT_64).named("e3"))
-      .addField(Types.optional(INT32).as(UINT_8).named("e4"))
-      .addField(Types.optional(INT32).as(UINT_16).named("e5"))
-      .addField(Types.optional(INT32).as(UINT_32).named("e6"))
-      .addField(Types.optional(INT64).as(UINT_64).named("e7"))
-      .addField(Types.optional(FLOAT).named("f"))
-      .addField(Types.optional(DOUBLE).named("f1"))
-      .addField(Types.optional(BINARY).as(UTF8).named("g"))
-      .addField(Types.optional(BINARY).named("h"))
-      .addField(Types.optional(BOOLEAN).named("i"))
-      .addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
-      .addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
-      .addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
-      .addField(Types.optional(INT32).as(DATE).named("k"))
-      .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
-      .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
-      .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("n"))
-      .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("n1"))
-      .named("root");
+    .addField(Types.optional(BINARY).named("a"))
+    .addField(Types.optionalGroup()
+      .addField(Types.optional(BINARY).named("ba"))
+      .named("b"))
+    .addField(Types.optionalList().
+      setElementType(Types.optional(BINARY).named("element"))
+      .named("c"))
+    .addField(Types.optionalList().
+      setElementType(Types.optional(BINARY).named("element"))
+      .named("d"))
+    .addField(Types.optionalGroup()
+      .addField(Types.optional(BINARY).named("ea"))
+      .named("e"))
+    .addField(Types.optional(INT32).as(INT_8).named("f"))
+    .addField(Types.optional(INT32).as(INT_16).named("f1"))
+    .addField(Types.optional(INT32).as(INT_32).named("f2"))
+    .addField(Types.optional(INT64).as(INT_64).named("f3"))
+    .addField(Types.optional(INT32).as(UINT_8).named("f4"))
+    .addField(Types.optional(INT32).as(UINT_16).named("f5"))
+    .addField(Types.optional(INT32).as(UINT_32).named("f6"))
+    .addField(Types.optional(INT64).as(UINT_64).named("f7"))
+    .addField(Types.optional(FLOAT).named("g"))
+    .addField(Types.optional(DOUBLE).named("g1"))
+    .addField(Types.optional(BINARY).as(UTF8).named("h"))
+    .addField(Types.optional(BINARY).named("i"))
+    .addField(Types.optional(BOOLEAN).named("j"))
+    .addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("k"))
+    .addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("k1"))
+    .addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("k2"))
+    .addField(Types.optional(INT32).as(DATE).named("l"))
+    .addField(Types.optional(INT32).as(TIME_MILLIS).named("m"))
+    .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("n"))
+    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o"))
+    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o1"))
+    .named("root");
 
   private final Schema supportedTypesArrowSchema = new Schema(asList(
-      field("b", new ArrowType.Struct_(), field("ba", new ArrowType.Binary())),
-      field("c", new ArrowType.List(), field(null, new ArrowType.Binary())),
-      field("e", new ArrowType.Int(8, true)),
-      field("e1", new ArrowType.Int(16, true)),
-      field("e2", new ArrowType.Int(32, true)),
-      field("e3", new ArrowType.Int(64, true)),
-      field("e4", new ArrowType.Int(8, false)),
-      field("e5", new ArrowType.Int(16, false)),
-      field("e6", new ArrowType.Int(32, false)),
-      field("e7", new ArrowType.Int(64, false)),
-      field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
-      field("f1", new ArrowType.FloatingPoint(Precision.DOUBLE)),
-      field("g", new ArrowType.Utf8()),
-      field("h", new ArrowType.Binary()),
-      field("i", new ArrowType.Bool()),
-      field("j", new ArrowType.Decimal(5, 5)),
-      field("j1", new ArrowType.Decimal(15, 5)),
-      field("j2", new ArrowType.Decimal(25, 5)),
-      field("k", new ArrowType.Date()),
-      field("l", new ArrowType.Time()),
-      field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND))
-      ));
+    field("b", new ArrowType.Struct(), field("ba", new ArrowType.Binary())),
+    field("c", new ArrowType.List(), field(null, new ArrowType.Binary())),
+    field("e", new ArrowType.Int(8, true)),
+    field("e1", new ArrowType.Int(16, true)),
+    field("e2", new ArrowType.Int(32, true)),
+    field("e3", new ArrowType.Int(64, true)),
+    field("e4", new ArrowType.Int(8, false)),
+    field("e5", new ArrowType.Int(16, false)),
+    field("e6", new ArrowType.Int(32, false)),
+    field("e7", new ArrowType.Int(64, false)),
+    field("f", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+    field("f1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+    field("g", new ArrowType.Utf8()),
+    field("h", new ArrowType.Binary()),
+    field("i", new ArrowType.Bool()),
+    field("j", new ArrowType.Decimal(5, 5)),
+    field("j1", new ArrowType.Decimal(15, 5)),
+    field("j2", new ArrowType.Decimal(25, 5)),
+    field("k", new ArrowType.Date(DateUnit.DAY)),
+    field("l", new ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32)),
+    field("m", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND,
"UTC"))
+  ));
 
   private final MessageType supportedTypesParquetSchema = Types.buildMessage()
-      .addField(Types.optionalGroup()
-          .addField(Types.optional(BINARY).named("ba"))
-          .named("b"))
-      .addField(Types.optionalList().
-          setElementType(Types.optional(BINARY).named("element"))
-          .named("c"))
-      .addField(Types.optional(INT32).as(INT_8).named("e"))
-      .addField(Types.optional(INT32).as(INT_16).named("e1"))
-      .addField(Types.optional(INT32).as(INT_32).named("e2"))
-      .addField(Types.optional(INT64).as(INT_64).named("e3"))
-      .addField(Types.optional(INT32).as(UINT_8).named("e4"))
-      .addField(Types.optional(INT32).as(UINT_16).named("e5"))
-      .addField(Types.optional(INT32).as(UINT_32).named("e6"))
-      .addField(Types.optional(INT64).as(UINT_64).named("e7"))
-      .addField(Types.optional(FLOAT).named("f"))
-      .addField(Types.optional(DOUBLE).named("f1"))
-      .addField(Types.optional(BINARY).as(UTF8).named("g"))
-      .addField(Types.optional(BINARY).named("h"))
-      .addField(Types.optional(BOOLEAN).named("i"))
-      .addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
-      .addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
-      .addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
-      .addField(Types.optional(INT32).as(DATE).named("k"))
-      .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
-      .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
-      .named("root");
+    .addField(Types.optionalGroup()
+      .addField(Types.optional(BINARY).named("ba"))
+      .named("b"))
+    .addField(Types.optionalList().
+      setElementType(Types.optional(BINARY).named("element"))
+      .named("c"))
+    .addField(Types.optional(INT32).as(INT_8).named("e"))
+    .addField(Types.optional(INT32).as(INT_16).named("e1"))
+    .addField(Types.optional(INT32).as(INT_32).named("e2"))
+    .addField(Types.optional(INT64).as(INT_64).named("e3"))
+    .addField(Types.optional(INT32).as(UINT_8).named("e4"))
+    .addField(Types.optional(INT32).as(UINT_16).named("e5"))
+    .addField(Types.optional(INT32).as(UINT_32).named("e6"))
+    .addField(Types.optional(INT64).as(UINT_64).named("e7"))
+    .addField(Types.optional(FLOAT).named("f"))
+    .addField(Types.optional(DOUBLE).named("f1"))
+    .addField(Types.optional(BINARY).as(UTF8).named("g"))
+    .addField(Types.optional(BINARY).named("h"))
+    .addField(Types.optional(BOOLEAN).named("i"))
+    .addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
+    .addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
+    .addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
+    .addField(Types.optional(INT32).as(DATE).named("k"))
+    .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
+    .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
+    .named("root");
 
   private final Schema paperArrowSchema = new Schema(asList(
-      field("DocId", false, new ArrowType.Int(64, true)),
-      field("Links", new ArrowType.Struct_(),
-          field("Backward", false, new ArrowType.List(), field(null, false, new ArrowType.Int(64,
true))),
-          field("Forward", false, new ArrowType.List(), field(null, false, new ArrowType.Int(64,
true)))
-      ),
-      field("Name", false, new ArrowType.List(),
-          field(null, false, new ArrowType.Struct_(),
-              field("Language", false, new ArrowType.List(),
-                  field(null, false, new ArrowType.Struct_(),
-                      field("Code", false, new ArrowType.Binary()),
-                      field("Country", new ArrowType.Binary())
-                  )
-               ),
-               field("Url", new ArrowType.Binary())
+    field("DocId", false, new ArrowType.Int(64, true)),
+    field("Links", new ArrowType.Struct(),
+      field("Backward", false, new ArrowType.List(), field(null, false, new ArrowType.Int(64,
true))),
+      field("Forward", false, new ArrowType.List(), field(null, false, new ArrowType.Int(64,
true)))
+    ),
+    field("Name", false, new ArrowType.List(),
+      field(null, false, new ArrowType.Struct(),
+        field("Language", false, new ArrowType.List(),
+          field(null, false, new ArrowType.Struct(),
+            field("Code", false, new ArrowType.Binary()),
+            field("Country", new ArrowType.Binary())
           )
+        ),
+        field("Url", new ArrowType.Binary())
       )
+    )
   ));
 
   private SchemaConverter converter = new SchemaConverter();
@@ -286,7 +294,7 @@ public class TestSchemaConverter {
   @Test
   public void testAllMap() throws IOException {
     SchemaMapping map = converter.map(allTypesArrowSchema, allTypesParquetSchema);
-    Assert.assertEquals("p, s<p>, l<p>, u<p>, p, p, p, p, p, p, p, p, p,
p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
+    Assert.assertEquals("p, s<p>, l<p>, l<p>, u<p>, p, p, p, p, p,
p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
   }
 
   private String toSummaryString(SchemaMapping map) {
@@ -301,32 +309,32 @@ public class TestSchemaConverter {
         sb.append(", ");
       }
       sb.append(
-          typeMapping.accept(new TypeMappingVisitor<String>() {
-            @Override
-            public String visit(PrimitiveTypeMapping primitiveTypeMapping) {
-              return "p";
-            }
+        typeMapping.accept(new TypeMappingVisitor<String>() {
+          @Override
+          public String visit(PrimitiveTypeMapping primitiveTypeMapping) {
+            return "p";
+          }
 
-            @Override
-            public String visit(StructTypeMapping structTypeMapping) {
-              return "s";
-            }
+          @Override
+          public String visit(StructTypeMapping structTypeMapping) {
+            return "s";
+          }
 
-            @Override
-            public String visit(UnionTypeMapping unionTypeMapping) {
-              return "u";
-            }
+          @Override
+          public String visit(UnionTypeMapping unionTypeMapping) {
+            return "u";
+          }
 
-            @Override
-            public String visit(ListTypeMapping listTypeMapping) {
-              return "l";
-            }
+          @Override
+          public String visit(ListTypeMapping listTypeMapping) {
+            return "l";
+          }
 
-            @Override
-            public String visit(RepeatedTypeMapping repeatedTypeMapping) {
-              return "r";
-            }
-          })
+          @Override
+          public String visit(RepeatedTypeMapping repeatedTypeMapping) {
+            return "r";
+          }
+        })
       );
       if (typeMapping.getChildren() != null && !typeMapping.getChildren().isEmpty())
{
         sb.append("<").append(toSummaryString(typeMapping.getChildren())).append(">");

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.

Mime
View raw message