Return-Path: X-Original-To: apmail-parquet-commits-archive@minotaur.apache.org Delivered-To: apmail-parquet-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4A73D18238 for ; Wed, 20 May 2015 02:36:08 +0000 (UTC) Received: (qmail 92169 invoked by uid 500); 20 May 2015 02:36:08 -0000 Delivered-To: apmail-parquet-commits-archive@parquet.apache.org Received: (qmail 92141 invoked by uid 500); 20 May 2015 02:36:08 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 92127 invoked by uid 99); 20 May 2015 02:36:08 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 May 2015 02:36:08 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 01ABDE35A7; Wed, 20 May 2015 02:36:07 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: alexlevenson@apache.org To: commits@parquet.apache.org Date: Wed, 20 May 2015 02:36:07 -0000 Message-Id: <7c1b4e52074c423d8d488552a102d26f@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] parquet-mr git commit: PARQUET-287: Keep a least 1 column from union members when projecting thrift unions Repository: parquet-mr Updated Branches: refs/heads/master 181affd5c -> ded56ffd5 http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java new file mode 100644 index 0000000..611a1a9 --- /dev/null +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java @@ -0,0 +1,480 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.thrift; + +import org.apache.parquet.thrift.test.compat.ListOfUnions; +import org.apache.parquet.thrift.test.compat.MapWithUnionKey; +import org.apache.parquet.thrift.test.compat.MapWithUnionValue; +import org.apache.parquet.thrift.test.compat.NestedNestedUnion; +import org.apache.parquet.thrift.test.compat.NestedUnion; +import org.apache.parquet.thrift.test.compat.OptionalInsideRequired; +import org.apache.parquet.thrift.test.compat.RequiredInsideOptional; +import org.apache.parquet.thrift.test.compat.StructWithNestedUnion; +import org.apache.parquet.thrift.test.compat.StructWithOptionalUnionOfStructs; +import org.apache.parquet.thrift.test.compat.UnionOfStructs; +import org.apache.parquet.thrift.test.compat.UnionV2; +import org.junit.Test; + +import static org.apache.parquet.thrift.TestThriftSchemaConverter.shouldGetProjectedSchema; + +public class TestThriftSchemaConverterProjectUnion { + + /** + * Test projecting into a top level union + */ + @Test + public void testTopLevelUnions() { + + // very simple case, a union of structs where each struct has only 1 field + shouldGetProjectedSchema("aLong/**", "aLong", + "message ParquetSchema {\n" + + " optional group aString = 1 {\n" + + " required binary s (UTF8) = 1;\n" + + " }\n" + + " optional group aLong = 2 {\n" + + " required int64 l = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + "}", UnionV2.class); + + // a union of structs, where each struct has more than one field + // we should still only get one field per child here though + shouldGetProjectedSchema("aNewBool/**", "aNewBool", + "message ParquetSchema {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + "}", UnionOfStructs.class); + } + + /** + * An optional union should be dropped if not selected. + */ + @Test + public void optionalUnionShouldBeDropped() { + shouldGetProjectedSchema("name", "name", + "message ParquetSchema {\n" + + " required binary name (UTF8) = 1;\n" + + "}", StructWithOptionalUnionOfStructs.class); + } + + /** + * An optional union inside a required struct should still be + * dropped if not selected. + */ + @Test + public void optionalUnionInRequiredStructShouldBeDropped() { + shouldGetProjectedSchema("name", "name", + "message ParquetSchema {\n" + + " required binary name (UTF8) = 1;\n" + + "}", OptionalInsideRequired.class); + } + + /** + * A required union inside of an un-selected optional struct + * should get dropped along with the optional struct. + */ + @Test + public void requiredUnionInsideOptionalStructShouldBeDropped() { + // test a required union inside an optional struct + shouldGetProjectedSchema("name", "name", + "message ParquetSchema {\n" + + " required binary name (UTF8) = 1;\n" + + "}", RequiredInsideOptional.class); + } + + /** + * A required union inside a selected optional struct should keep + * one sentinel column for each "kind" of union member. + */ + @Test + public void requiredUnionInsideOptionalStructShouldBeKeptIfParentSelected() { + shouldGetProjectedSchema("aStruct/name", "aStruct.name", + "message ParquetSchema {\n" + + " optional group aStruct = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " required group aUnion = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " }\n" + + "}", RequiredInsideOptional.class); + } + + /** + * Selecting only one "kind" of a union should trigger keeping one sentinel + * column from the rest of the "kinds" of the union. + */ + @Test + public void selectingOneUnionMemberKeepsSentinels() { + shouldGetProjectedSchema( + "aUnion/structV4/addedStruct/gender", + "aUnion.structV4.addedStruct.gender", + "message ParquetSchema {\n" + + " optional group aUnion = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " optional group addedStruct = 4 {\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + "}", + StructWithOptionalUnionOfStructs.class); + } + + /** + * In the case of a union inside a union (and union inside union inside union), + * even though the fields will be "optional" because of how unions + * are stored, we still need to grab sentinel columns from + * the members of the union. + */ + @Test + public void testUnionInsideUnion() { + + shouldGetProjectedSchema("structV3/age", "structV3.age", + "message ParquetSchema {\n" + + " optional group structV3 = 1 {\n" + + " optional binary age (UTF8) = 2;\n" + + " }\n" + + " optional group unionOfStructs = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " optional group aLong = 3 {\n" + + " required int64 l = 1;\n" + + " }\n" + + "}", NestedUnion.class); + + shouldGetProjectedSchema( + "unionOfStructs/structV4/addedStruct/gender", + "unionOfStructs.structV4.addedStruct.gender", + "message ParquetSchema {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group unionOfStructs = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " optional group addedStruct = 4 {\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " optional group aLong = 3 {\n" + + " required int64 l = 1;\n" + + " }\n" + + "}\n", NestedUnion.class); + + shouldGetProjectedSchema( + "unionV2/aLong/**", + "unionV2.aLong", + "message ParquetSchema {\n" + + " optional group nestedUnion = 1 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group unionOfStructs = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " optional group aLong = 3 {\n" + + " required int64 l = 1;\n" + + " }\n" + + " }\n" + + " optional group unionV2 = 2 {\n" + + " optional group aString = 1 {\n" + + " required binary s (UTF8) = 1;\n" + + " }\n" + + " optional group aLong = 2 {\n" + + " required int64 l = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + "}", NestedNestedUnion.class); + + } + + @Test + public void testListOfUnions() { + + // selecting a field from an optional list of unions should also choose a sentinel field + // for the rest of the union members + // at the same time, this should also drop the required list of unions, because it is safe to + // drop even a required but repeated group + shouldGetProjectedSchema( + "optListUnion/structV3/age", + "optListUnion.structV3.age", + "message ParquetSchema {\n" + + " optional group optListUnion (LIST) = 1 {\n" + + " repeated group optListUnion_tuple {\n" + + " optional group structV3 = 1 {\n" + + " optional binary age (UTF8) = 2;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " }\n" + + "}", ListOfUnions.class); + + // same goes for selecting a field from a required list of unions + // and at the same time, the optional list of unions should be dropped too + shouldGetProjectedSchema( + "reqListUnion/structV3/age", + "reqListUnion.structV3.age", + "message ParquetSchema {\n" + + " required group reqListUnion (LIST) = 2 {\n" + + " repeated group reqListUnion_tuple {\n" + + " optional group structV3 = 1 {\n" + + " optional binary age (UTF8) = 2;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " }\n" + + "}", ListOfUnions.class); + + } + + @Test + public void testMapWithUnionKey() { + shouldGetProjectedSchema( + "optMapWithUnionKey/key/**", + "optMapWithUnionKey.key", + "message ParquetSchema {\n" + + " optional group optMapWithUnionKey (MAP) = 1 {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required group key {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " optional binary age (UTF8) = 2;\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " optional binary age (UTF8) = 2;\n" + + " optional binary gender (UTF8) = 3;\n" + + " optional group addedStruct = 4 {\n" + + " required binary name (UTF8) = 1;\n" + + " optional binary age (UTF8) = 2;\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " optional group value {\n" + + " required binary name (UTF8) = 1;\n" + + " } " + + " }\n" + + " }\n" + + "}", MapWithUnionKey.class); + + shouldGetProjectedSchema( + "optMapWithUnionKey/key/**;optMapWithUnionKey/value/gender", + "optMapWithUnionKey.{key,value.gender}", + "message ParquetSchema {\n" + + " optional group optMapWithUnionKey (MAP) = 1 {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required group key {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " optional binary age (UTF8) = 2;\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " optional binary age (UTF8) = 2;\n" + + " optional binary gender (UTF8) = 3;\n" + + " optional group addedStruct = 4 {\n" + + " required binary name (UTF8) = 1;\n" + + " optional binary age (UTF8) = 2;\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " optional group value {\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " }\n" + + " }\n" + + "}", MapWithUnionKey.class); + } + + @Test + public void testMapWithUnionValue() { + shouldGetProjectedSchema( + "optMapWithUnionValue/key/**;optMapWithUnionValue/value/structV4/addedStruct/gender", + "optMapWithUnionValue.{key,value.structV4.addedStruct.gender}", + "message ParquetSchema {\n" + + " optional group optMapWithUnionValue (MAP) = 1 {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required group key {\n" + + " required binary name (UTF8) = 1;\n" + + " optional binary age (UTF8) = 2;\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " optional group value {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " optional group addedStruct = 4 {\n" + + " optional binary gender (UTF8) = 3;\n" + + " }\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", MapWithUnionValue.class); + } + + /** + * Tests a complicated struct that contains required, optional, and unspecified + * members of a few different corner cases. + */ + @Test + public void testMessyNestedUnions() { + shouldGetProjectedSchema("reqStructWithUnionV2/name", "reqStructWithUnionV2.name", + "message ParquetSchema {\n" + + " required group reqUnionOfStructs = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " required group reqNestedUnion = 5 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group unionOfStructs = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " optional group aLong = 3 {\n" + + " required int64 l = 1;\n" + + " }\n" + + " }\n" + + " required group reqStructWithUnionV2 = 8 {\n" + + " required binary name (UTF8) = 1;\n" + + " required group aUnion = 2 {\n" + + " optional group aString = 1 {\n" + + " required binary s (UTF8) = 1;\n" + + " }\n" + + " optional group aLong = 2 {\n" + + " required int64 l = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " }\n" + + " required group reqUnionStructUnion = 11 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structWithUnionOfStructs = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " required group aUnion = 2 {\n" + + " optional group structV3 = 1 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group structV4 = 2 {\n" + + " required binary name (UTF8) = 1;\n" + + " }\n" + + " optional group aNewBool = 3 {\n" + + " required boolean b = 1;\n" + + " }\n" + + " }\n" + + " }\n" + + " optional group aLong = 3 {\n" + + " required int64 l = 1;\n" + + " }\n" + + " }\n" + + "}", StructWithNestedUnion.class); + } +} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java index 335a7ff..ee545db 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java @@ -69,9 +69,7 @@ public class TestFieldsPath { } - private static class PrimitivePathVisitor implements ThriftType.TypeVisitor { - private List paths = new ArrayList(); - private FieldsPath path = new FieldsPath(); + private static class PrimitivePathVisitor implements ThriftType.TypeVisitor, FieldsPath> { private String delim; private PrimitivePathVisitor(String delim) { @@ -80,87 +78,85 @@ public class TestFieldsPath { public static List visit(StructType s, String delim) { PrimitivePathVisitor v = new PrimitivePathVisitor(delim); - s.accept(v); - return v.getPaths(); - } - - public List getPaths() { - return paths; + return s.accept(v, new FieldsPath()); } @Override - public void visit(MapType mapType) { + public List visit(MapType mapType, FieldsPath path) { + List ret = new ArrayList(); + ThriftField key = mapType.getKey(); ThriftField value = mapType.getValue(); - path.push(key); - key.getType().accept(this); - path.pop(); - path.push(value); - value.getType().accept(this); - path.pop(); + + ret.addAll(key.getType().accept(this, path.push(key))); + ret.addAll(value.getType().accept(this, path.push(value))); + + return ret; } @Override - public void visit(SetType setType) { - setType.getValues().getType().accept(this); + public List visit(SetType setType, FieldsPath path) { + return setType.getValues().getType().accept(this, path); } @Override - public void visit(ListType listType) { - listType.getValues().getType().accept(this); + public List visit(ListType listType, FieldsPath path) { + return listType.getValues().getType().accept(this, path); } @Override - public void visit(StructType structType) { + public List visit(StructType structType, FieldsPath path) { + List ret = new ArrayList(); + for (ThriftField child : structType.getChildren()) { - path.push(child); - child.getType().accept(this); - path.pop(); + ret.addAll(child.getType().accept(this, path.push(child))); } + + return ret; } - private void visitPrimitive() { - paths.add(path.toDelimitedString(delim)); + private List visitPrimitive(FieldsPath path) { + return Arrays.asList(path.toDelimitedString(delim)); } @Override - public void visit(EnumType enumType) { - visitPrimitive(); + public List visit(EnumType enumType, FieldsPath path) { + return visitPrimitive(path); } @Override - public void visit(BoolType boolType) { - visitPrimitive(); + public List visit(BoolType boolType, FieldsPath path) { + return visitPrimitive(path); } @Override - public void visit(ByteType byteType) { - visitPrimitive(); + public List visit(ByteType byteType, FieldsPath path) { + return visitPrimitive(path); } @Override - public void visit(DoubleType doubleType) { - visitPrimitive(); + public List visit(DoubleType doubleType, FieldsPath path) { + return visitPrimitive(path); } @Override - public void visit(I16Type i16Type) { - visitPrimitive(); + public List visit(I16Type i16Type, FieldsPath path) { + return visitPrimitive(path); } @Override - public void visit(I32Type i32Type) { - visitPrimitive(); + public List visit(I32Type i32Type, FieldsPath path) { + return visitPrimitive(path); } @Override - public void visit(I64Type i64Type) { - visitPrimitive(); + public List visit(I64Type i64Type, FieldsPath path) { + return visitPrimitive(path); } @Override - public void visit(StringType stringType) { - visitPrimitive(); + public List visit(StringType stringType, FieldsPath path) { + return visitPrimitive(path); } } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet-thrift/src/test/thrift/compat.thrift ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/test/thrift/compat.thrift b/parquet-thrift/src/test/thrift/compat.thrift index 2bd8a8c..63972a8 100644 --- a/parquet-thrift/src/test/thrift/compat.thrift +++ b/parquet-thrift/src/test/thrift/compat.thrift @@ -18,6 +18,7 @@ */ namespace java org.apache.parquet.thrift.test.compat + struct StructV1 { 1: required string name } @@ -100,6 +101,18 @@ struct MapAddRequiredStructV1{ 1: required map map1 } +struct MapWithStructValue { + 1: required map reqMap +} + +struct MapWithPrimMapValue { + 1: required map> reqMap +} + +struct MapWithStructMapValue { + 1: required map> reqMap +} + struct SetStructV1{ 1: required set set1 } @@ -159,3 +172,84 @@ struct StructWithAStructThatLooksLikeUnionV2 { 1: required string name, 2: required AStructThatLooksLikeUnionV2 aNotQuiteUnion } + +union UnionOfStructs { + 1: StructV3 structV3, + 2: StructV4WithExtracStructField structV4, + 3: ABool aNewBool +} + +struct StructWithUnionOfStructs { + 1: required string name, + 2: required UnionOfStructs aUnion +} + +struct StructWithOptionalUnionOfStructs { + 1: required string name, + 2: optional UnionOfStructs aUnion +} + +struct StructWithRequiredUnionOfStructs { + 1: required string name, + 2: required UnionOfStructs aUnion +} + +struct OptionalInsideRequired { + 1: required string name, + 2: required StructWithOptionalUnionOfStructs aStruct +} + +struct RequiredInsideOptional { + 1: required string name, + 2: optional StructWithRequiredUnionOfStructs aStruct +} + +union UnionStructUnion { + 1: StructV3 structV3 + 2: StructWithUnionOfStructs structWithUnionOfStructs + 3: ALong aLong +} + +union NestedUnion { + 1: StructV3 structV3 + 2: UnionOfStructs unionOfStructs + 3: ALong aLong +} + +union NestedNestedUnion { + 1: NestedUnion nestedUnion + 2: UnionV2 unionV2 +} + +struct StructWithNestedUnion { + 1: optional UnionOfStructs optUnionOfStructs + 2: required UnionOfStructs reqUnionOfStructs + 3: UnionOfStructs unspecifiedUnionOfStructs + + 4: optional NestedUnion optNestedUnion + 5: required NestedUnion reqNestedUnion + 6: NestedUnion unspecifiedNestedUnion + + 7: optional StructWithUnionV2 optStructWithUnionV2 + 8: required StructWithUnionV2 reqStructWithUnionV2 + 9: StructWithUnionV2 unspecifiedStructWithUnionV2 + + 10: optional UnionStructUnion optUnionStructUnion + 11: required UnionStructUnion reqUnionStructUnion + 12: UnionStructUnion unspecifiedUnionStructUnion +} + +struct MapWithUnionKey { + 1: optional map optMapWithUnionKey + 2: required map reqMapWithUnionKey +} + +struct MapWithUnionValue { + 1: optional map optMapWithUnionValue + 2: required map reqMapWithUnionValue +} + +struct ListOfUnions { + 1: optional list optListUnion + 2: required list reqListUnion +} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ded56ffd/parquet_cascading.md ---------------------------------------------------------------------- diff --git a/parquet_cascading.md b/parquet_cascading.md index 15470e1..a1b0a68 100644 --- a/parquet_cascading.md +++ b/parquet_cascading.md @@ -115,7 +115,11 @@ This applies for repeated fields as well, for example `primaryAddress.otherPhone Maps are a special case -- the map is split into two columns, the key and the value. All the columns in the key are required, but you can select a subset of the columns in the value (or skip the value entirely), for example: `otherAddresses.{key,value.street}` will select only the streets from the values of the map, but the entire key will be kept. To select an entire map, you can do: `otherAddresses.{key,value}`, -and to select only the keys: `otherAddresses.key`. When selecting a field that is a struct, for example `primaryAddress.primaryPhone`, +and to select only the keys: `otherAddresses.key`. Similar to map keys, the values in a set cannot be partially projected, +you must select all the columns of the items in the set, or none of them. This is because materializing the set wouldn't make much sense if the item's +hashcode is dependent on the dropped columns (as with the key of a map). + +When selecting a field that is a struct, for example `primaryAddress.primaryPhone`, it will select the entire struct. So `primaryAddress.primaryPhone.*` is redundant. Columns can be specified concretely (like `primaryAddress.primaryPhone.doNotCall`), or a restricted glob syntax can be used.