Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id F3206200C24 for ; Thu, 19 Jan 2017 02:27:15 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id F1CF0160B56; Thu, 19 Jan 2017 01:27:15 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id C9F2B160B5F for ; Thu, 19 Jan 2017 02:27:14 +0100 (CET) Received: (qmail 53531 invoked by uid 500); 19 Jan 2017 01:27:14 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 52756 invoked by uid 99); 19 Jan 2017 01:27:13 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 19 Jan 2017 01:27:13 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 20B8DF1736; Thu, 19 Jan 2017 01:27:13 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: blue@apache.org To: commits@parquet.apache.org Date: Thu, 19 Jan 2017 01:27:37 -0000 Message-Id: <51a64fd1305e4e758c4f76c15969e642@git.apache.org> In-Reply-To: <25406da3dfe343a9a44d6bc62fd223d9@git.apache.org> References: <25406da3dfe343a9a44d6bc62fd223d9@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [26/50] [abbrv] parquet-mr git commit: PARQUET-660: Ignore extension fields in protobuf messages. archived-at: Thu, 19 Jan 2017 01:27:16 -0000 PARQUET-660: Ignore extension fields in protobuf messages. Currently, converting protobuf messages with extension can result in an uninformative error or a data corruption. A more detailed explanation in the corresponding [jira](https://issues.apache.org/jira/browse/PARQUET-660). This patch simply ignores extension fields in protobuf messages. In the longer run, I'd like to add a proper support for Protobuf extensions. This might take a little longer though, so I've decided to improve the current situation with this patch. Author: Jakub Kukul Closes #351 from jkukul/master and squashes the following commits: 27580ab [Jakub Kukul] PARQUET-660: Throw Unsupported exception for messages with extensions. db6e08b [Jakub Kukul] PARQUET-660: Refactor: Don't use additional variable for indexing fieldWriters. e910a8a [Jakub Kukul] PARQUET-660: Refactor: Add missing indentation. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/4beb0609 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/4beb0609 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/4beb0609 Branch: refs/heads/parquet-1.8.x Commit: 4beb06094b6e14379c6bb16ae09b7b3a47c8c5fb Parents: 6e95d45 Author: Jakub Kukul Authored: Thu Sep 8 14:48:42 2016 -0700 Committer: Ryan Blue Committed: Mon Jan 9 16:54:54 2017 -0800 ---------------------------------------------------------------------- .../parquet/proto/ProtoSchemaConverter.java | 30 ++++++++++---------- .../apache/parquet/proto/ProtoWriteSupport.java | 13 ++++++--- .../parquet/proto/ProtoWriteSupportTest.java | 15 ++++++++++ .../src/test/resources/TestProtobuf.proto | 11 +++++++ 4 files changed, 50 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java index 8c9685a..3f6ed6b 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java @@ -86,21 +86,21 @@ public class ProtoSchemaConverter { Type.Repetition repetition = getRepetition(descriptor); JavaType javaType = descriptor.getJavaType(); switch (javaType) { - case BOOLEAN : return builder.primitive(BOOLEAN, repetition); - case INT : return builder.primitive(INT32, repetition); - case LONG : return builder.primitive(INT64, repetition); - case FLOAT : return builder.primitive(FLOAT, repetition); - case DOUBLE: return builder.primitive(DOUBLE, repetition); - case BYTE_STRING: return builder.primitive(BINARY, repetition); - case STRING: return builder.primitive(BINARY, repetition).as(UTF8); - case MESSAGE: { - GroupBuilder> group = builder.group(repetition); - convertFields(group, descriptor.getMessageType().getFields()); - return group; - } - case ENUM: return builder.primitive(BINARY, repetition).as(ENUM); - default: - throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType); + case BOOLEAN: return builder.primitive(BOOLEAN, repetition); + case INT: return builder.primitive(INT32, repetition); + case LONG: return builder.primitive(INT64, repetition); + case FLOAT: return builder.primitive(FLOAT, repetition); + case DOUBLE: return builder.primitive(DOUBLE, repetition); + case BYTE_STRING: return builder.primitive(BINARY, repetition); + case STRING: return builder.primitive(BINARY, repetition).as(UTF8); + case MESSAGE: { + GroupBuilder> group = builder.group(repetition); + convertFields(group, descriptor.getMessageType().getFields()); + return group; + } + case ENUM: return builder.primitive(BINARY, repetition).as(ENUM); + default: + throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType); } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java index d7f7a53..cef2b93 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java @@ -156,7 +156,6 @@ public class ProtoWriteSupport extends WriteSupport< List fields = descriptor.getFields(); fieldWriters = (FieldWriter[]) Array.newInstance(FieldWriter.class, fields.size()); - int i = 0; for (Descriptors.FieldDescriptor fieldDescriptor: fields) { String name = fieldDescriptor.getName(); Type type = schema.getType(name); @@ -169,8 +168,7 @@ public class ProtoWriteSupport extends WriteSupport< writer.setFieldName(name); writer.setIndex(schema.getFieldIndex(name)); - fieldWriters[i] = writer; - i++; + fieldWriters[fieldDescriptor.getIndex()] = writer; } } @@ -220,6 +218,13 @@ public class ProtoWriteSupport extends WriteSupport< for (Map.Entry entry : changedPbFields.entrySet()) { Descriptors.FieldDescriptor fieldDescriptor = entry.getKey(); + + if(fieldDescriptor.isExtension()) { + // Field index of an extension field might overlap with a base field. + throw new UnsupportedOperationException( + "Cannot convert Protobuf message with extension field(s)"); + } + int fieldIndex = fieldDescriptor.getIndex(); fieldWriters[fieldIndex].writeField(entry.getValue()); } @@ -276,7 +281,7 @@ public class ProtoWriteSupport extends WriteSupport< } class IntWriter extends FieldWriter { - @Override + @Override final void writeRawValue(Object value) { recordConsumer.addInteger((Integer) value); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index 73f7734..3a273c9 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -165,4 +165,19 @@ public class ProtoWriteSupportTest { inOrder.verify(readConsumerMock).endMessage(); Mockito.verifyNoMoreInteractions(readConsumerMock); } + + @Test(expected = UnsupportedOperationException.class) + public void testMessageWithExtensions() throws Exception { + RecordConsumer readConsumerMock = Mockito.mock(RecordConsumer.class); + ProtoWriteSupport instance = createReadConsumerInstance(TestProtobuf.Vehicle.class, readConsumerMock); + + TestProtobuf.Vehicle.Builder msg = TestProtobuf.Vehicle.newBuilder(); + msg.setHorsePower(300); + // Currently there's no support for extension fields. This test tests that the extension field + // will cause an exception. + msg.setExtension(TestProtobuf.Airplane.wingSpan, 50); + + instance.write(msg.build()); + } + } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/test/resources/TestProtobuf.proto ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto b/parquet-protobuf/src/test/resources/TestProtobuf.proto index afa0f63..d7cdf03 100644 --- a/parquet-protobuf/src/test/resources/TestProtobuf.proto +++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto @@ -137,3 +137,14 @@ message SecondCustomClassMessage { } //please place your unit test Protocol Buffer definitions here. + +message Vehicle { + optional int32 horsePower = 1; + extensions 100 to 199; +} + +message Airplane { + extend Vehicle { + optional int32 wingSpan = 101; + } +}