parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alexleven...@apache.org
Subject parquet-mr git commit: PARQUET-362 - Fix parquet buffered writer being oversensitive to union schema changes
Date Thu, 20 Aug 2015 20:53:03 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 2f956f465 -> 3f36b7b50


PARQUET-362 - Fix parquet buffered writer being oversensitive to union schema changes

Parquet does prevent records with unknown union fields to be written as it would
create a TProtocol violation. But it also prevents records with unions having one their field
itself having an unknown field (which is acceptable if it is a struct).

Author: Laurent Goujon <lgoujon@twitter.com>

Closes #262 from laurentgo/fix-parquet-union-write-bug and squashes the following commits:

d15ee74 [Laurent Goujon] Fix parquet buffered writer being oversentive to union changes


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/3f36b7b5
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/3f36b7b5
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/3f36b7b5

Branch: refs/heads/master
Commit: 3f36b7b50bdda3eeca632ad5440bb82b8e34cb40
Parents: 2f956f4
Author: Laurent Goujon <lgoujon@twitter.com>
Authored: Thu Aug 20 13:52:56 2015 -0700
Committer: Alex Levenson <alexlevenson@twitter.com>
Committed: Thu Aug 20 13:52:56 2015 -0700

----------------------------------------------------------------------
 .../thrift/BufferedProtocolReadToWrite.java     |  9 +++--
 .../parquet/thrift/TestProtocolReadToWrite.java | 37 ++++++++++++++++++++
 parquet-thrift/src/test/thrift/compat.thrift    |  8 +++++
 3 files changed, 49 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/3f36b7b5/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
b/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
index 70bd003..9fce1c3 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
@@ -375,6 +375,9 @@ public class BufferedProtocolReadToWrite implements ProtocolPipe {
         hasFieldsIgnored |= true;
         continue;
       }
+      
+      childFieldsPresent++;
+
       buffer.add(new Action() {
         @Override
         public void write(TProtocol out) throws TException {
@@ -386,11 +389,7 @@ public class BufferedProtocolReadToWrite implements ProtocolPipe {
           return "f=" + currentField.id + "<t=" + typeName(currentField.type) + ">:
";
         }
       });
-      boolean wasIgnored = readOneValue(in, field.type, buffer, expectedField.getType());
-      if (!wasIgnored) {
-        childFieldsPresent++;
-      }
-      hasFieldsIgnored |= wasIgnored;
+      hasFieldsIgnored |= readOneValue(in, field.type, buffer, expectedField.getType());
       in.readFieldEnd();
       buffer.add(FIELD_END);
     }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/3f36b7b5/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestProtocolReadToWrite.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestProtocolReadToWrite.java
b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestProtocolReadToWrite.java
index ba27166..bdd3a3f 100644
--- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestProtocolReadToWrite.java
+++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestProtocolReadToWrite.java
@@ -220,6 +220,43 @@ public class TestProtocolReadToWrite {
     assertEquals(0, countingHandler.fieldIgnoredCount);
   }
 
+  @Test
+  public void testUnionWithStructWithUnknownField() throws Exception {
+    CountingErrorHandler countingHandler = new CountingErrorHandler();
+    BufferedProtocolReadToWrite p = new BufferedProtocolReadToWrite(ThriftSchemaConverter.toStructType(UnionV3.class),
countingHandler);
+    ByteArrayOutputStream in = new ByteArrayOutputStream();
+    final ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+    UnionV3 validUnion = UnionV3.aStruct(new StructV1("a valid struct"));
+    StructV2 structV2 = new StructV2("a valid struct");
+    structV2.setAge("a valid age");
+    UnionThatLooksLikeUnionV3 unionWithUnknownStructField = UnionThatLooksLikeUnionV3.aStruct(structV2);
+
+    validUnion.write(protocol(in));
+    unionWithUnknownStructField.write(protocol(in));
+
+    ByteArrayInputStream baos = new ByteArrayInputStream(in.toByteArray());
+
+    // both should not throw
+    p.readOne(protocol(baos), protocol(out));
+    p.readOne(protocol(baos), protocol(out));
+
+    assertEquals(1, countingHandler.recordCountOfMissingFields);
+    assertEquals(1, countingHandler.fieldIgnoredCount);
+
+    in = new ByteArrayOutputStream();
+    validUnion.write(protocol(in));
+    unionWithUnknownStructField.write(protocol(in));
+
+    baos = new ByteArrayInputStream(in.toByteArray());
+
+    // both should not throw
+    p.readOne(protocol(baos), protocol(out));
+    p.readOne(protocol(baos), protocol(out));
+
+    assertEquals(2, countingHandler.recordCountOfMissingFields);
+    assertEquals(2, countingHandler.fieldIgnoredCount);
+  }
 
   /**
    * When enum value in data has an undefined index, it's considered as corrupted record
and will be skipped.

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/3f36b7b5/parquet-thrift/src/test/thrift/compat.thrift
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/thrift/compat.thrift b/parquet-thrift/src/test/thrift/compat.thrift
index 8c90e23..3316576 100644
--- a/parquet-thrift/src/test/thrift/compat.thrift
+++ b/parquet-thrift/src/test/thrift/compat.thrift
@@ -152,6 +152,10 @@ union UnionV2 {
   3: ABool aNewBool
 }
 
+union UnionV3 {
+  1: StructV1 aStruct
+}
+
 struct StructWithUnionV1 {  
   1: required string name,
   2: required UnionV1 aUnion
@@ -173,6 +177,10 @@ struct StructWithAStructThatLooksLikeUnionV2 {
   2: required AStructThatLooksLikeUnionV2 aNotQuiteUnion
 }
 
+union UnionThatLooksLikeUnionV3 {
+  1: StructV2 aStruct
+}
+
 union UnionOfStructs {
   1: StructV3 structV3,
   2: StructV4WithExtracStructField structV4,


Mime
View raw message