parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject [parquet-mr] branch master updated: PARQUET-952: Avro union with single type fails with 'is not a group' (#459)
Date Mon, 18 Jun 2018 07:47:28 GMT
This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new cc8bdf1  PARQUET-952: Avro union with single type fails with 'is not a group' (#459)
cc8bdf1 is described below

commit cc8bdf1d13639d12d02170d40cc4890180bbabc5
Author: nandorKollar <nandorKollar@users.noreply.github.com>
AuthorDate: Mon Jun 18 09:47:25 2018 +0200

    PARQUET-952: Avro union with single type fails with 'is not a group' (#459)
---
 .../apache/parquet/avro/AvroSchemaConverter.java   | 22 ++++++++++-----
 .../org/apache/parquet/avro/TestReadWrite.java     | 31 ++++++++++++++++++++++
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
index 70b6525..1bb12b9 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
@@ -195,8 +195,11 @@ public class AvroSchemaConverter {
 
   private Type convertUnion(String fieldName, Schema schema, Type.Repetition repetition)
{
     List<Schema> nonNullSchemas = new ArrayList<Schema>(schema.getTypes().size());
+    // Found any schemas in the union? Required for the edge case, where the union contains
only a single type.
+    boolean foundNullSchema = false;
     for (Schema childSchema : schema.getTypes()) {
       if (childSchema.getType().equals(Schema.Type.NULL)) {
+        foundNullSchema = true;
         if (Type.Repetition.REQUIRED == repetition) {
           repetition = Type.Repetition.OPTIONAL;
         }
@@ -211,16 +214,21 @@ public class AvroSchemaConverter {
         throw new UnsupportedOperationException("Cannot convert Avro union of only nulls");
 
       case 1:
-        return convertField(fieldName, nonNullSchemas.get(0), repetition);
+        return foundNullSchema ? convertField(fieldName, nonNullSchemas.get(0), repetition)
:
+          convertUnionToGroupType(fieldName, repetition, nonNullSchemas);
 
       default: // complex union type
-        List<Type> unionTypes = new ArrayList<Type>(nonNullSchemas.size());
-        int index = 0;
-        for (Schema childSchema : nonNullSchemas) {
-          unionTypes.add( convertField("member" + index++, childSchema, Type.Repetition.OPTIONAL));
-        }
-        return new GroupType(repetition, fieldName, unionTypes);
+        return convertUnionToGroupType(fieldName, repetition, nonNullSchemas);
+    }
+  }
+
+  private Type convertUnionToGroupType(String fieldName, Type.Repetition repetition, List<Schema>
nonNullSchemas) {
+    List<Type> unionTypes = new ArrayList<Type>(nonNullSchemas.size());
+    int index = 0;
+    for (Schema childSchema : nonNullSchemas) {
+      unionTypes.add( convertField("member" + index++, childSchema, Type.Repetition.OPTIONAL));
     }
+    return new GroupType(repetition, fieldName, unionTypes);
   }
 
   private Type convertField(Schema.Field field) {
diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
index 4fa71ea..2335e36 100644
--- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
+++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
@@ -647,6 +647,37 @@ public class TestReadWrite {
 
   }
 
+  @Test
+  public void testUnionWithSingleNonNullType() throws Exception {
+    Schema avroSchema = Schema.createRecord("SingleStringUnionRecord", null, null, false);
+    avroSchema.setFields(
+      Collections.singletonList(new Schema.Field("value",
+        Schema.createUnion(Schema.create(Schema.Type.STRING)), null, null)));
+
+    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
+    tmp.deleteOnExit();
+    tmp.delete();
+    Path file = new Path(tmp.getPath());
+
+    // Parquet writer
+    ParquetWriter parquetWriter = AvroParquetWriter.builder(file).withSchema(avroSchema)
+      .withConf(new Configuration())
+      .build();
+
+    GenericRecord record = new GenericRecordBuilder(avroSchema)
+      .set("value", "theValue")
+      .build();
+
+    parquetWriter.write(record);
+    parquetWriter.close();
+
+    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf,
file);
+    GenericRecord nextRecord = reader.read();
+
+    assertNotNull(nextRecord);
+    assertEquals(str("theValue"), nextRecord.get("value"));
+  }
+
   /**
    * Return a String or Utf8 depending on whether compatibility is on
    */

-- 
To stop receiving notification emails like this one, please contact
gabor@apache.org.

Mime
View raw message