parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [parquet-mr] branch master updated: PARQUET-138: Allow merging more restrictive field in less restrictive field (#550)
Date Fri, 01 Feb 2019 18:49:37 GMT
This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 51c4cc3  PARQUET-138: Allow merging more restrictive field in less restrictive field
(#550)
51c4cc3 is described below

commit 51c4cc30f5df1f070a211cfed652aefdc096de69
Author: Nicolas Trinquier <ntrinquier@users.noreply.github.com>
AuthorDate: Fri Feb 1 18:49:32 2019 +0000

    PARQUET-138: Allow merging more restrictive field in less restrictive field (#550)
    
    * Allow merging more restrictive field in less restrictive field
    * Make class and function names more explicit
---
 .../java/org/apache/parquet/schema/GroupType.java  |  3 --
 .../org/apache/parquet/schema/PrimitiveType.java   |  3 +-
 .../main/java/org/apache/parquet/schema/Type.java  | 23 ++++++++++++++
 .../org/apache/parquet/schema/TestMessageType.java | 17 +++++-----
 .../apache/parquet/schema/TestRepetitionType.java  | 36 ++++++++++++++++++++++
 5 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
index 64e7062..52184e1 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
@@ -400,9 +400,6 @@ public class GroupType extends Type {
       Type merged;
       if (toMerge.containsField(type.getName())) {
         Type fieldToMerge = toMerge.getType(type.getName());
-        if (fieldToMerge.getRepetition().isMoreRestrictiveThan(type.getRepetition())) {
-          throw new IncompatibleSchemaModificationException("repetition constraint is more
restrictive: can not merge type " + fieldToMerge + " into " + type);
-        }
         if (type.getLogicalTypeAnnotation() != null && !type.getLogicalTypeAnnotation().equals(fieldToMerge.getLogicalTypeAnnotation()))
{
           throw new IncompatibleSchemaModificationException("cannot merge logical type "
+ fieldToMerge.getLogicalTypeAnnotation() + " into " + type.getLogicalTypeAnnotation());
         }
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index 9ab53b2..b59fdec 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -750,7 +750,8 @@ public final class PrimitiveType extends Type {
       }
     }
 
-    Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(primitive, toMerge.getRepetition());
+    Repetition repetition = Repetition.leastRestrictive(this.getRepetition(), toMerge.getRepetition());
+    Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(primitive, repetition);
 
     if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitive) {
       builder.length(length);
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
index d046957..dd13ec1 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
@@ -20,6 +20,7 @@ package org.apache.parquet.schema;
 
 import static org.apache.parquet.Preconditions.checkNotNull;
 
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.parquet.io.InvalidRecordException;
@@ -111,6 +112,28 @@ abstract public class Type {
      */
     abstract public boolean isMoreRestrictiveThan(Repetition other);
 
+
+    /**
+     * @param repetitions repetitions to traverse
+     * @return the least restrictive repetition of all repetitions provided
+     */
+    public static Repetition leastRestrictive(Repetition... repetitions) {
+      boolean hasOptional = false;
+
+      for (Repetition repetition : repetitions) {
+        if (repetition == REPEATED) {
+          return REPEATED;
+        } else if (repetition == OPTIONAL) {
+          hasOptional = true;
+        }
+      }
+
+      if (hasOptional) {
+        return OPTIONAL;
+      }
+
+      return REQUIRED;
+    }
   }
 
   private final String name;
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
index e511d42..ee7663c 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
@@ -20,9 +20,6 @@ package org.apache.parquet.schema;
 
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
 import static org.apache.parquet.schema.OriginalType.LIST;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
@@ -30,6 +27,9 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
 import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
 import static org.apache.parquet.schema.Type.Repetition.REPEATED;
 import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import org.junit.Test;
 
@@ -89,12 +89,11 @@ public class TestMessageType {
     MessageType t4 = new MessageType("root2",
         new PrimitiveType(REQUIRED, BINARY, "a"));
 
-    try {
-      t3.union(t4);
-      fail("moving from optional to required");
-    } catch (IncompatibleSchemaModificationException e) {
-      assertEquals("repetition constraint is more restrictive: can not merge type required
binary a into optional binary a", e.getMessage());
-    }
+    assertEquals(
+        t3.union(t4),
+        new MessageType("root1",
+            new PrimitiveType(OPTIONAL, BINARY, "a"))
+        );
 
     assertEquals(
         t4.union(t3),
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestRepetitionType.java
b/parquet-column/src/test/java/org/apache/parquet/schema/TestRepetitionType.java
new file mode 100644
index 0000000..524b03c
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestRepetitionType.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestRepetitionType {
+  @Test
+  public void testLeastRestrictiveRepetition() {
+    Type.Repetition REQUIRED = Type.Repetition.REQUIRED;
+    Type.Repetition OPTIONAL = Type.Repetition.OPTIONAL;
+    Type.Repetition REPEATED = Type.Repetition.REPEATED;
+
+    assertEquals(REPEATED, Type.Repetition.leastRestrictive(REQUIRED, OPTIONAL, REPEATED,
REQUIRED, OPTIONAL, REPEATED));
+    assertEquals(OPTIONAL, Type.Repetition.leastRestrictive(REQUIRED, OPTIONAL, REQUIRED,
OPTIONAL));
+    assertEquals(REQUIRED, Type.Repetition.leastRestrictive(REQUIRED, REQUIRED));
+  }
+}


Mime
View raw message