parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alexleven...@apache.org
Subject [1/2] parquet-mr git commit: PARQUET-287: Keep a least 1 column from union members when projecting thrift unions
Date Wed, 20 May 2015 02:36:07 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 181affd5c -> ded56ffd5


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java
b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java
new file mode 100644
index 0000000..611a1a9
--- /dev/null
+++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java
@@ -0,0 +1,480 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.thrift;
+
+import org.apache.parquet.thrift.test.compat.ListOfUnions;
+import org.apache.parquet.thrift.test.compat.MapWithUnionKey;
+import org.apache.parquet.thrift.test.compat.MapWithUnionValue;
+import org.apache.parquet.thrift.test.compat.NestedNestedUnion;
+import org.apache.parquet.thrift.test.compat.NestedUnion;
+import org.apache.parquet.thrift.test.compat.OptionalInsideRequired;
+import org.apache.parquet.thrift.test.compat.RequiredInsideOptional;
+import org.apache.parquet.thrift.test.compat.StructWithNestedUnion;
+import org.apache.parquet.thrift.test.compat.StructWithOptionalUnionOfStructs;
+import org.apache.parquet.thrift.test.compat.UnionOfStructs;
+import org.apache.parquet.thrift.test.compat.UnionV2;
+import org.junit.Test;
+
+import static org.apache.parquet.thrift.TestThriftSchemaConverter.shouldGetProjectedSchema;
+
+public class TestThriftSchemaConverterProjectUnion {
+
+  /**
+   * Test projecting into a top level union
+   */
+  @Test
+  public void testTopLevelUnions() {
+
+    // very simple case, a union of structs where each struct has only 1 field
+    shouldGetProjectedSchema("aLong/**", "aLong",
+        "message ParquetSchema {\n" +
+        "  optional group aString = 1 {\n" +
+        "    required binary s (UTF8) = 1;\n" +
+        "  }\n" +
+        "  optional group aLong = 2 {\n" +
+        "    required int64 l = 1;\n" +
+        "  }\n" +
+        "  optional group aNewBool = 3 {\n" +
+        "    required boolean b = 1;\n" +
+        "  }\n" +
+        "}", UnionV2.class);
+
+    // a union of structs, where each struct has more than one field
+    // we should still only get one field per child here though
+    shouldGetProjectedSchema("aNewBool/**", "aNewBool",
+        "message ParquetSchema {\n" +
+        "  optional group structV3 = 1 {\n" +
+        "    required binary name (UTF8) = 1;\n" +
+        "  }\n" +
+        "  optional group structV4 = 2 {\n" +
+        "    required binary name (UTF8) = 1;\n" +
+        "  }\n" +
+        "  optional group aNewBool = 3 {\n" +
+        "    required boolean b = 1;\n" +
+        "  }\n" +
+        "}", UnionOfStructs.class);
+  }
+
+  /**
+   * An optional union should be dropped if not selected.
+   */
+  @Test
+  public void optionalUnionShouldBeDropped() {
+    shouldGetProjectedSchema("name", "name",
+        "message ParquetSchema {\n" +
+        "  required binary name (UTF8) = 1;\n" +
+        "}", StructWithOptionalUnionOfStructs.class);
+  }
+
+  /**
+   * An optional union inside a required struct should still be
+   * dropped if not selected.
+   */
+  @Test
+  public void optionalUnionInRequiredStructShouldBeDropped() {
+    shouldGetProjectedSchema("name", "name",
+        "message ParquetSchema {\n" +
+        "  required binary name (UTF8) = 1;\n" +
+        "}", OptionalInsideRequired.class);
+  }
+
+  /**
+   * A required union inside of an un-selected optional struct
+   * should get dropped along with the optional struct.
+   */
+  @Test
+  public void requiredUnionInsideOptionalStructShouldBeDropped() {
+    // test a required union inside an optional struct
+    shouldGetProjectedSchema("name", "name",
+        "message ParquetSchema {\n" +
+        "  required binary name (UTF8) = 1;\n" +
+        "}", RequiredInsideOptional.class);
+  }
+
+  /**
+   * A required union inside a selected optional struct should keep
+   * one sentinel column for each "kind" of union member.
+   */
+  @Test
+  public void requiredUnionInsideOptionalStructShouldBeKeptIfParentSelected() {
+    shouldGetProjectedSchema("aStruct/name", "aStruct.name",
+        "message ParquetSchema {\n" +
+        "  optional group aStruct = 2 {\n" +
+        "    required binary name (UTF8) = 1;\n" +
+        "    required group aUnion = 2 {\n" +
+        "      optional group structV3 = 1 {\n" +
+        "        required binary name (UTF8) = 1;\n" +
+        "      }\n" +
+        "      optional group structV4 = 2 {\n" +
+        "        required binary name (UTF8) = 1;\n" +
+        "      }\n" +
+        "      optional group aNewBool = 3 {\n" +
+        "        required boolean b = 1;\n" +
+        "      }\n" +
+        "    }\n" +
+        "  }\n" +
+        "}", RequiredInsideOptional.class);
+  }
+
+  /**
+   * Selecting only one "kind" of a union should trigger keeping one sentinel
+   * column from the rest of the "kinds" of the union.
+   */
+  @Test
+  public void selectingOneUnionMemberKeepsSentinels() {
+    shouldGetProjectedSchema(
+        "aUnion/structV4/addedStruct/gender",
+        "aUnion.structV4.addedStruct.gender",
+        "message ParquetSchema {\n" +
+        "  optional group aUnion = 2 {\n" +
+        "    optional group structV3 = 1 {\n" +
+        "      required binary name (UTF8) = 1;\n" +
+        "    }\n" +
+        "    optional group structV4 = 2 {\n" +
+        "      optional group addedStruct = 4 {\n" +
+        "        optional binary gender (UTF8) = 3;\n" +
+        "      }\n" +
+        "    }\n" +
+        "    optional group aNewBool = 3 {\n" +
+        "      required boolean b = 1;\n" +
+        "    }\n" +
+        "  }\n" +
+        "}",
+        StructWithOptionalUnionOfStructs.class);
+  }
+
+  /**
+   * In the case of a union inside a union (and union inside union inside union),
+   * even though the fields will be "optional" because of how unions
+   * are stored, we still need to grab sentinel columns from
+   * the members of the union.
+   */
+  @Test
+  public void testUnionInsideUnion() {
+
+    shouldGetProjectedSchema("structV3/age", "structV3.age",
+        "message ParquetSchema {\n" +
+        "  optional group structV3 = 1 {\n" +
+        "    optional binary age (UTF8) = 2;\n" +
+        "  }\n" +
+        "  optional group unionOfStructs = 2 {\n" +
+        "    optional group structV3 = 1 {\n" +
+        "      required binary name (UTF8) = 1;\n" +
+        "    }\n" +
+        "    optional group structV4 = 2 {\n" +
+        "      required binary name (UTF8) = 1;\n" +
+        "    }\n" +
+        "    optional group aNewBool = 3 {\n" +
+        "      required boolean b = 1;\n" +
+        "    }\n" +
+        "  }\n" +
+        "  optional group aLong = 3 {\n" +
+        "    required int64 l = 1;\n" +
+        "  }\n" +
+        "}", NestedUnion.class);
+
+    shouldGetProjectedSchema(
+        "unionOfStructs/structV4/addedStruct/gender",
+        "unionOfStructs.structV4.addedStruct.gender",
+        "message ParquetSchema {\n" +
+        "  optional group structV3 = 1 {\n" +
+        "    required binary name (UTF8) = 1;\n" +
+        "  }\n" +
+        "  optional group unionOfStructs = 2 {\n" +
+        "    optional group structV3 = 1 {\n" +
+        "      required binary name (UTF8) = 1;\n" +
+        "    }\n" +
+        "    optional group structV4 = 2 {\n" +
+        "      optional group addedStruct = 4 {\n" +
+        "        optional binary gender (UTF8) = 3;\n" +
+        "      }\n" +
+        "    }\n" +
+        "    optional group aNewBool = 3 {\n" +
+        "      required boolean b = 1;\n" +
+        "    }\n" +
+        "  }\n" +
+        "  optional group aLong = 3 {\n" +
+        "    required int64 l = 1;\n" +
+        "  }\n" +
+        "}\n", NestedUnion.class);
+
+    shouldGetProjectedSchema(
+        "unionV2/aLong/**",
+        "unionV2.aLong",
+        "message ParquetSchema {\n" +
+        "  optional group nestedUnion = 1 {\n" +
+        "    optional group structV3 = 1 {\n" +
+        "      required binary name (UTF8) = 1;\n" +
+        "    }\n" +
+        "    optional group unionOfStructs = 2 {\n" +
+        "      optional group structV3 = 1 {\n" +
+        "        required binary name (UTF8) = 1;\n" +
+        "      }\n" +
+        "      optional group structV4 = 2 {\n" +
+        "        required binary name (UTF8) = 1;\n" +
+        "      }\n" +
+        "      optional group aNewBool = 3 {\n" +
+        "        required boolean b = 1;\n" +
+        "      }\n" +
+        "    }\n" +
+        "    optional group aLong = 3 {\n" +
+        "      required int64 l = 1;\n" +
+        "    }\n" +
+        "  }\n" +
+        "  optional group unionV2 = 2 {\n" +
+        "    optional group aString = 1 {\n" +
+        "      required binary s (UTF8) = 1;\n" +
+        "    }\n" +
+        "    optional group aLong = 2 {\n" +
+        "      required int64 l = 1;\n" +
+        "    }\n" +
+        "    optional group aNewBool = 3 {\n" +
+        "      required boolean b = 1;\n" +
+        "    }\n" +
+        "  }\n" +
+        "}", NestedNestedUnion.class);
+
+  }
+
+  @Test
+  public void testListOfUnions() {
+
+    // selecting a field from an optional list of unions should also choose a sentinel field
+    // for the rest of the union members
+    // at the same time, this should also drop the required list of unions, because it is
safe to
+    // drop even a required but repeated group
+    shouldGetProjectedSchema(
+      "optListUnion/structV3/age",
+      "optListUnion.structV3.age",
+      "message ParquetSchema {\n" +
+      "  optional group optListUnion (LIST) = 1 {\n" +
+      "    repeated group optListUnion_tuple {\n" +
+      "      optional group structV3 = 1 {\n" +
+      "        optional binary age (UTF8) = 2;\n" +
+      "      }\n" +
+      "      optional group structV4 = 2 {\n" +
+      "        required binary name (UTF8) = 1;\n" +
+      "      }\n" +
+      "      optional group aNewBool = 3 {\n" +
+      "        required boolean b = 1;\n" +
+      "      }\n" +
+      "    }\n" +
+      "  }\n" +
+      "}", ListOfUnions.class);
+
+    // same goes for selecting a field from a required list of unions
+    // and at the same time, the optional list of unions should be dropped too
+    shouldGetProjectedSchema(
+        "reqListUnion/structV3/age",
+        "reqListUnion.structV3.age",
+        "message ParquetSchema {\n" +
+        "  required group reqListUnion (LIST) = 2 {\n" +
+        "    repeated group reqListUnion_tuple {\n" +
+        "      optional group structV3 = 1 {\n" +
+        "        optional binary age (UTF8) = 2;\n" +
+        "      }\n" +
+        "      optional group structV4 = 2 {\n" +
+        "        required binary name (UTF8) = 1;\n" +
+        "      }\n" +
+        "      optional group aNewBool = 3 {\n" +
+        "        required boolean b = 1;\n" +
+        "      }\n" +
+        "    }\n" +
+        "  }\n" +
+        "}", ListOfUnions.class);
+
+  }
+
+  @Test
+  public void testMapWithUnionKey() {
+    shouldGetProjectedSchema(
+        "optMapWithUnionKey/key/**",
+        "optMapWithUnionKey.key",
+        "message ParquetSchema {\n" +
+        "  optional group optMapWithUnionKey (MAP) = 1 {\n" +
+        "    repeated group map (MAP_KEY_VALUE) {\n" +
+        "      required group key {\n" +
+        "        optional group structV3 = 1 {\n" +
+        "          required binary name (UTF8) = 1;\n" +
+        "          optional binary age (UTF8) = 2;\n" +
+        "          optional binary gender (UTF8) = 3;\n" +
+        "        }\n" +
+        "        optional group structV4 = 2 {\n" +
+        "          required binary name (UTF8) = 1;\n" +
+        "          optional binary age (UTF8) = 2;\n" +
+        "          optional binary gender (UTF8) = 3;\n" +
+        "          optional group addedStruct = 4 {\n" +
+        "            required binary name (UTF8) = 1;\n" +
+        "            optional binary age (UTF8) = 2;\n" +
+        "            optional binary gender (UTF8) = 3;\n" +
+        "          }\n" +
+        "        }\n" +
+        "        optional group aNewBool = 3 {\n" +
+        "          required boolean b = 1;\n" +
+        "        }\n" +
+        "      }\n" +
+        "      optional group value {\n" +
+        "        required binary name (UTF8) = 1;\n" +
+        "      } " +
+        "    }\n" +
+        "  }\n" +
+        "}", MapWithUnionKey.class);
+
+    shouldGetProjectedSchema(
+        "optMapWithUnionKey/key/**;optMapWithUnionKey/value/gender",
+        "optMapWithUnionKey.{key,value.gender}",
+        "message ParquetSchema {\n" +
+        "  optional group optMapWithUnionKey (MAP) = 1 {\n" +
+        "    repeated group map (MAP_KEY_VALUE) {\n" +
+        "      required group key {\n" +
+        "        optional group structV3 = 1 {\n" +
+        "          required binary name (UTF8) = 1;\n" +
+        "          optional binary age (UTF8) = 2;\n" +
+        "          optional binary gender (UTF8) = 3;\n" +
+        "        }\n" +
+        "        optional group structV4 = 2 {\n" +
+        "          required binary name (UTF8) = 1;\n" +
+        "          optional binary age (UTF8) = 2;\n" +
+        "          optional binary gender (UTF8) = 3;\n" +
+        "          optional group addedStruct = 4 {\n" +
+        "            required binary name (UTF8) = 1;\n" +
+        "            optional binary age (UTF8) = 2;\n" +
+        "            optional binary gender (UTF8) = 3;\n" +
+        "          }\n" +
+        "        }\n" +
+        "        optional group aNewBool = 3 {\n" +
+        "          required boolean b = 1;\n" +
+        "        }\n" +
+        "      }\n" +
+        "      optional group value {\n" +
+        "        optional binary gender (UTF8) = 3;\n" +
+        "      }\n" +
+        "    }\n" +
+        "  }\n" +
+        "}", MapWithUnionKey.class);
+  }
+
+  @Test
+  public void testMapWithUnionValue() {
+    shouldGetProjectedSchema(
+        "optMapWithUnionValue/key/**;optMapWithUnionValue/value/structV4/addedStruct/gender",
+        "optMapWithUnionValue.{key,value.structV4.addedStruct.gender}",
+        "message ParquetSchema {\n" +
+        "  optional group optMapWithUnionValue (MAP) = 1 {\n" +
+        "    repeated group map (MAP_KEY_VALUE) {\n" +
+        "      required group key {\n" +
+        "        required binary name (UTF8) = 1;\n" +
+        "        optional binary age (UTF8) = 2;\n" +
+        "        optional binary gender (UTF8) = 3;\n" +
+        "      }\n" +
+        "      optional group value {\n" +
+        "        optional group structV3 = 1 {\n" +
+        "          required binary name (UTF8) = 1;\n" +
+        "        }\n" +
+        "        optional group structV4 = 2 {\n" +
+        "          optional group addedStruct = 4 {\n" +
+        "            optional binary gender (UTF8) = 3;\n" +
+        "          }\n" +
+        "        }\n" +
+        "        optional group aNewBool = 3 {\n" +
+        "          required boolean b = 1;\n" +
+        "        }\n" +
+        "      }\n" +
+        "    }\n" +
+        "  }\n" +
+        "}", MapWithUnionValue.class);
+  }
+
+  /**
+   * Tests a complicated struct that contains required, optional, and unspecified
+   * members of a few different corner cases.
+   */
+  @Test
+  public void testMessyNestedUnions() {
+    shouldGetProjectedSchema("reqStructWithUnionV2/name", "reqStructWithUnionV2.name",
+        "message ParquetSchema {\n" +
+            "  required group reqUnionOfStructs = 2 {\n" +
+            "    optional group structV3 = 1 {\n" +
+            "      required binary name (UTF8) = 1;\n" +
+            "    }\n" +
+            "    optional group structV4 = 2 {\n" +
+            "      required binary name (UTF8) = 1;\n" +
+            "    }\n" +
+            "    optional group aNewBool = 3 {\n" +
+            "      required boolean b = 1;\n" +
+            "    }\n" +
+            "  }\n" +
+            "  required group reqNestedUnion = 5 {\n" +
+            "    optional group structV3 = 1 {\n" +
+            "      required binary name (UTF8) = 1;\n" +
+            "    }\n" +
+            "    optional group unionOfStructs = 2 {\n" +
+            "      optional group structV3 = 1 {\n" +
+            "        required binary name (UTF8) = 1;\n" +
+            "      }\n" +
+            "      optional group structV4 = 2 {\n" +
+            "        required binary name (UTF8) = 1;\n" +
+            "      }\n" +
+            "      optional group aNewBool = 3 {\n" +
+            "        required boolean b = 1;\n" +
+            "      }\n" +
+            "    }\n" +
+            "    optional group aLong = 3 {\n" +
+            "      required int64 l = 1;\n" +
+            "    }\n" +
+            "  }\n" +
+            "  required group reqStructWithUnionV2 = 8 {\n" +
+            "    required binary name (UTF8) = 1;\n" +
+            "    required group aUnion = 2 {\n" +
+            "      optional group aString = 1 {\n" +
+            "        required binary s (UTF8) = 1;\n" +
+            "      }\n" +
+            "      optional group aLong = 2 {\n" +
+            "        required int64 l = 1;\n" +
+            "      }\n" +
+            "      optional group aNewBool = 3 {\n" +
+            "        required boolean b = 1;\n" +
+            "      }\n" +
+            "    }\n" +
+            "  }\n" +
+            "  required group reqUnionStructUnion = 11 {\n" +
+            "    optional group structV3 = 1 {\n" +
+            "      required binary name (UTF8) = 1;\n" +
+            "    }\n" +
+            "    optional group structWithUnionOfStructs = 2 {\n" +
+            "      required binary name (UTF8) = 1;\n" +
+            "      required group aUnion = 2 {\n" +
+            "        optional group structV3 = 1 {\n" +
+            "          required binary name (UTF8) = 1;\n" +
+            "        }\n" +
+            "        optional group structV4 = 2 {\n" +
+            "          required binary name (UTF8) = 1;\n" +
+            "        }\n" +
+            "        optional group aNewBool = 3 {\n" +
+            "          required boolean b = 1;\n" +
+            "        }\n" +
+            "      }\n" +
+            "    }\n" +
+            "    optional group aLong = 3 {\n" +
+            "      required int64 l = 1;\n" +
+            "    }\n" +
+            "  }\n" +
+            "}", StructWithNestedUnion.class);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
b/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
index 335a7ff..ee545db 100644
--- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
+++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
@@ -69,9 +69,7 @@ public class TestFieldsPath {
 
   }
 
-  private static class PrimitivePathVisitor implements ThriftType.TypeVisitor {
-    private List<String> paths = new ArrayList<String>();
-    private FieldsPath path = new FieldsPath();
+  private static class PrimitivePathVisitor implements ThriftType.TypeVisitor<List<String>,
FieldsPath> {
     private String delim;
 
     private PrimitivePathVisitor(String delim) {
@@ -80,87 +78,85 @@ public class TestFieldsPath {
 
     public static List<String> visit(StructType s, String delim) {
       PrimitivePathVisitor v = new PrimitivePathVisitor(delim);
-      s.accept(v);
-      return v.getPaths();
-    }
-
-    public List<String> getPaths() {
-      return paths;
+      return s.accept(v, new FieldsPath());
     }
 
     @Override
-    public void visit(MapType mapType) {
+    public List<String> visit(MapType mapType, FieldsPath path) {
+      List<String> ret = new ArrayList<String>();
+
       ThriftField key = mapType.getKey();
       ThriftField value = mapType.getValue();
-      path.push(key);
-      key.getType().accept(this);
-      path.pop();
-      path.push(value);
-      value.getType().accept(this);
-      path.pop();
+
+      ret.addAll(key.getType().accept(this, path.push(key)));
+      ret.addAll(value.getType().accept(this, path.push(value)));
+
+      return ret;
     }
 
     @Override
-    public void visit(SetType setType) {
-      setType.getValues().getType().accept(this);
+    public List<String> visit(SetType setType, FieldsPath path) {
+      return setType.getValues().getType().accept(this, path);
     }
 
     @Override
-    public void visit(ListType listType) {
-      listType.getValues().getType().accept(this);
+    public List<String> visit(ListType listType, FieldsPath path) {
+      return listType.getValues().getType().accept(this, path);
     }
 
     @Override
-    public void visit(StructType structType) {
+    public List<String> visit(StructType structType, FieldsPath path) {
+      List<String> ret = new ArrayList<String>();
+
       for (ThriftField child : structType.getChildren()) {
-        path.push(child);
-        child.getType().accept(this);
-        path.pop();
+        ret.addAll(child.getType().accept(this, path.push(child)));
       }
+
+      return ret;
     }
 
-    private void visitPrimitive() {
-      paths.add(path.toDelimitedString(delim));
+    private List<String> visitPrimitive(FieldsPath path) {
+      return Arrays.asList(path.toDelimitedString(delim));
     }
 
     @Override
-    public void visit(EnumType enumType) {
-      visitPrimitive();
+    public List<String> visit(EnumType enumType, FieldsPath path) {
+      return visitPrimitive(path);
     }
 
     @Override
-    public void visit(BoolType boolType) {
-      visitPrimitive();
+    public List<String> visit(BoolType boolType, FieldsPath path) {
+      return visitPrimitive(path);
     }
 
     @Override
-    public void visit(ByteType byteType) {
-      visitPrimitive();
+    public List<String> visit(ByteType byteType, FieldsPath path) {
+      return visitPrimitive(path);
     }
 
     @Override
-    public void visit(DoubleType doubleType) {
-      visitPrimitive();
+    public List<String> visit(DoubleType doubleType, FieldsPath path) {
+      return visitPrimitive(path);
     }
 
     @Override
-    public void visit(I16Type i16Type) {
-      visitPrimitive();
+    public List<String> visit(I16Type i16Type, FieldsPath path) {
+      return visitPrimitive(path);
     }
 
     @Override
-    public void visit(I32Type i32Type) {
-      visitPrimitive();
+    public List<String> visit(I32Type i32Type, FieldsPath path) {
+      return visitPrimitive(path);
     }
 
     @Override
-    public void visit(I64Type i64Type) {
-      visitPrimitive();
+    public List<String> visit(I64Type i64Type, FieldsPath path) {
+      return visitPrimitive(path);
     }
 
     @Override
-    public void visit(StringType stringType) {
-      visitPrimitive();
+    public List<String> visit(StringType stringType, FieldsPath path) {
+      return visitPrimitive(path);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet-thrift/src/test/thrift/compat.thrift
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/thrift/compat.thrift b/parquet-thrift/src/test/thrift/compat.thrift
index 2bd8a8c..63972a8 100644
--- a/parquet-thrift/src/test/thrift/compat.thrift
+++ b/parquet-thrift/src/test/thrift/compat.thrift
@@ -18,6 +18,7 @@
  */
 
 namespace java org.apache.parquet.thrift.test.compat
+
 struct StructV1 {
   1: required string name
 }
@@ -100,6 +101,18 @@ struct MapAddRequiredStructV1{
   1: required map<AddRequiredStructV1,string> map1
 }
 
+struct MapWithStructValue {
+  1: required map<string, StructV4WithExtracStructField> reqMap
+}
+
+struct MapWithPrimMapValue {
+  1: required map<string, map<string, string>> reqMap
+}
+
+struct MapWithStructMapValue {
+  1: required map<string, map<string, StructV4WithExtracStructField>> reqMap
+}
+
 struct SetStructV1{
   1: required set<StructV1> set1
 }
@@ -159,3 +172,84 @@ struct StructWithAStructThatLooksLikeUnionV2 {
   1: required string name,
   2: required AStructThatLooksLikeUnionV2 aNotQuiteUnion
 }
+
+union UnionOfStructs {
+  1: StructV3 structV3,
+  2: StructV4WithExtracStructField structV4,
+  3: ABool aNewBool
+}
+
+struct StructWithUnionOfStructs {  
+  1: required string name,
+  2: required UnionOfStructs aUnion
+}
+
+struct StructWithOptionalUnionOfStructs {  
+  1: required string name,
+  2: optional UnionOfStructs aUnion
+}
+
+struct StructWithRequiredUnionOfStructs {  
+  1: required string name,
+  2: required UnionOfStructs aUnion
+}
+
+struct OptionalInsideRequired {
+  1: required string name,
+  2: required StructWithOptionalUnionOfStructs aStruct
+}
+
+struct RequiredInsideOptional {
+  1: required string name,
+  2: optional StructWithRequiredUnionOfStructs aStruct
+}
+
+union UnionStructUnion {
+  1: StructV3 structV3
+  2: StructWithUnionOfStructs structWithUnionOfStructs
+  3: ALong aLong 
+}
+
+union NestedUnion {
+  1: StructV3 structV3
+  2: UnionOfStructs unionOfStructs
+  3: ALong aLong 
+}
+
+union NestedNestedUnion {
+  1: NestedUnion nestedUnion
+  2: UnionV2 unionV2
+}
+
+struct StructWithNestedUnion {
+  1: optional UnionOfStructs optUnionOfStructs
+  2: required UnionOfStructs reqUnionOfStructs
+  3: UnionOfStructs unspecifiedUnionOfStructs
+  
+  4: optional NestedUnion optNestedUnion
+  5: required NestedUnion reqNestedUnion
+  6: NestedUnion unspecifiedNestedUnion
+  
+  7: optional StructWithUnionV2 optStructWithUnionV2
+  8: required StructWithUnionV2 reqStructWithUnionV2
+  9: StructWithUnionV2 unspecifiedStructWithUnionV2
+  
+  10: optional UnionStructUnion optUnionStructUnion
+  11: required UnionStructUnion reqUnionStructUnion
+  12: UnionStructUnion unspecifiedUnionStructUnion
+}
+
+struct MapWithUnionKey {
+  1: optional map<UnionOfStructs, StructV3> optMapWithUnionKey
+  2: required map<UnionOfStructs, StructV3> reqMapWithUnionKey
+}
+
+struct MapWithUnionValue {
+  1: optional map<StructV3, UnionOfStructs> optMapWithUnionValue
+  2: required map<StructV3, UnionOfStructs> reqMapWithUnionValue
+}
+
+struct ListOfUnions {
+  1: optional list<UnionOfStructs> optListUnion
+  2: required list<UnionOfStructs> reqListUnion
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet_cascading.md
----------------------------------------------------------------------
diff --git a/parquet_cascading.md b/parquet_cascading.md
index 15470e1..a1b0a68 100644
--- a/parquet_cascading.md
+++ b/parquet_cascading.md
@@ -115,7 +115,11 @@ This applies for repeated fields as well, for example `primaryAddress.otherPhone
 Maps are a special case -- the map is split into two columns, the key and the value. All
the columns in the key are required, but you can select a subset of the
 columns in the value (or skip the value entirely), for example: `otherAddresses.{key,value.street}`
will select only the streets from the
 values of the map, but the entire key will be kept. To select an entire map, you can do:
`otherAddresses.{key,value}`, 
-and to select only the keys: `otherAddresses.key`. When selecting a field that is a struct,
for example `primaryAddress.primaryPhone`, 
+and to select only the keys: `otherAddresses.key`. Similar to map keys, the values in a set
cannot be partially projected,
+you must select all the columns of the items in the set, or none of them. This is because
materializing the set wouldn't make much sense if the item's
+hashcode is dependent on the dropped columns (as with the key of a map).
+
+When selecting a field that is a struct, for example `primaryAddress.primaryPhone`, 
 it will select the entire struct. So `primaryAddress.primaryPhone.*` is redundant.
 
 Columns can be specified concretely (like `primaryAddress.primaryPhone.doNotCall`), or a
restricted glob syntax can be used.


Mime
View raw message