Return-Path: X-Original-To: apmail-parquet-commits-archive@minotaur.apache.org Delivered-To: apmail-parquet-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 5FB9217F2B for ; Wed, 11 Mar 2015 22:12:00 +0000 (UTC) Received: (qmail 14181 invoked by uid 500); 11 Mar 2015 22:11:54 -0000 Delivered-To: apmail-parquet-commits-archive@parquet.apache.org Received: (qmail 14159 invoked by uid 500); 11 Mar 2015 22:11:54 -0000 Mailing-List: contact commits-help@parquet.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.incubator.apache.org Delivered-To: mailing list commits@parquet.incubator.apache.org Received: (qmail 14150 invoked by uid 99); 11 Mar 2015 22:11:54 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 11 Mar 2015 22:11:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED,T_RP_MATCHES_RCVD X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO mail.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with SMTP; Wed, 11 Mar 2015 22:11:52 +0000 Received: (qmail 13701 invoked by uid 99); 11 Mar 2015 22:11:32 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 11 Mar 2015 22:11:32 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D166BE10A9; Wed, 11 Mar 2015 22:11:31 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: blue@apache.org To: commits@parquet.incubator.apache.org Message-Id: <181328d875ec4ee9a62efbc0867b1997@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: incubator-parquet-mr git commit: PARQUET-172: Add parquet-thrift binary tests. Date: Wed, 11 Mar 2015 22:11:31 +0000 (UTC) X-Virus-Checked: Checked by ClamAV on apache.org Repository: incubator-parquet-mr Updated Branches: refs/heads/master 5acc6a550 -> 031a762d1 PARQUET-172: Add parquet-thrift binary tests. These tests validate that there is no encoding problem with parquet-thrift or parquet-scrooge. See https://github.com/laurencer/parquet-mr-bug Author: Ryan Blue Closes #145 from rdblue/PARQUET-172-add-thrift-binary-test and squashes the following commits: 6856414 [Ryan Blue] PARQUET-172: Add parquet-thrift binary tests. Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/031a762d Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/031a762d Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/031a762d Branch: refs/heads/master Commit: 031a762d105bceda2049204ba54b8f8737f359b4 Parents: 5acc6a5 Author: Ryan Blue Authored: Wed Mar 11 15:11:16 2015 -0700 Committer: Ryan Blue Committed: Wed Mar 11 15:11:16 2015 -0700 ---------------------------------------------------------------------- .../java/parquet/scrooge/ScroogeBinaryTest.java | 100 +++++++++++++++++++ .../scrooge/ScroogeStructConverterTest.java | 7 ++ parquet-scrooge/src/test/thrift/test.thrift | 4 + .../java/parquet/hadoop/thrift/TestBinary.java | 66 ++++++++++++ parquet-thrift/src/test/thrift/binary.thrift | 25 +++++ 5 files changed, 202 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java ---------------------------------------------------------------------- diff --git a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java new file mode 100644 index 0000000..19bf68c --- /dev/null +++ b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package parquet.scrooge; + +import java.io.File; +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import parquet.hadoop.ParquetReader; +import parquet.hadoop.ParquetWriter; +import parquet.scrooge.test.StringAndBinary; +import parquet.thrift.ThriftParquetReader; + +public class ScroogeBinaryTest { + @Rule + public TemporaryFolder tempDir = new TemporaryFolder(); + + @Test + public void testScroogeBinaryEncoding() throws Exception { + StringAndBinary expected = new StringAndBinary.Immutable("test", + ByteBuffer.wrap(new byte[] {-123, 20, 33})); + + File temp = tempDir.newFile(UUID.randomUUID().toString()); + temp.deleteOnExit(); + temp.delete(); + + Path path = new Path(temp.getPath()); + + ParquetWriter writer = new ParquetWriter( + path, new Configuration(), new ScroogeWriteSupport(StringAndBinary.class)); + writer.write(expected); + writer.close(); + + // read using the parquet-thrift version to isolate the write path + ParquetReader reader = ThriftParquetReader. + build(path) + .withThriftClass(parquet.thrift.test.binary.StringAndBinary.class) + .build(); + parquet.thrift.test.binary.StringAndBinary record = reader.read(); + reader.close(); + + Assert.assertEquals("String should match after serialization round trip", + "test", record.s); + Assert.assertEquals("ByteBuffer should match after serialization round trip", + ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b); + } + + @Test + @SuppressWarnings("unchecked") + public void testScroogeBinaryDecoding() throws Exception { + StringAndBinary expected = new StringAndBinary.Immutable("test", + ByteBuffer.wrap(new byte[] {-123, 20, 33})); + + File temp = tempDir.newFile(UUID.randomUUID().toString()); + temp.deleteOnExit(); + temp.delete(); + + Path path = new Path(temp.getPath()); + + ParquetWriter writer = new ParquetWriter( + path, new Configuration(), new ScroogeWriteSupport(StringAndBinary.class)); + writer.write(expected); + writer.close(); + + Configuration conf = new Configuration(); + conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName()); + ParquetReader reader = ParquetReader. + builder(new ScroogeReadSupport(), path) + .withConf(conf) + .build(); + StringAndBinary record = reader.read(); + reader.close(); + + Assert.assertEquals("String should match after serialization round trip", + "test", record.s()); + Assert.assertEquals("ByteBuffer should match after serialization round trip", + ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b()); + } +} http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java ---------------------------------------------------------------------- diff --git a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java index 7431c10..3dc5369 100644 --- a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java +++ b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java @@ -30,6 +30,7 @@ import parquet.scrooge.test.TestOptionalMap; import parquet.scrooge.test.TestPersonWithAllInformation; import parquet.scrooge.test.TestSetPrimitive; import parquet.scrooge.test.TestUnion; +import parquet.scrooge.test.StringAndBinary; import parquet.thrift.ThriftSchemaConverter; import parquet.thrift.struct.ThriftType; import static org.junit.Assert.assertEquals; @@ -43,7 +44,13 @@ public class ScroogeStructConverterTest { ThriftType.StructType scroogeMap = new ScroogeStructConverter().convert(TestMapPrimitiveKey.class); ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(parquet.thrift.test.TestMapPrimitiveKey.class); assertEquals(expected,scroogeMap); + } + @Test + public void testBinary() throws Exception { + ThriftType.StructType scroogeBinary = new ScroogeStructConverter().convert(StringAndBinary.class); + ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(parquet.thrift.test.StringAndBinary.class); + assertEquals(expected, scroogeBinary); } @Test http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/thrift/test.thrift ---------------------------------------------------------------------- diff --git a/parquet-scrooge/src/test/thrift/test.thrift b/parquet-scrooge/src/test/thrift/test.thrift index 11c598f..a80bbb0 100644 --- a/parquet-scrooge/src/test/thrift/test.thrift +++ b/parquet-scrooge/src/test/thrift/test.thrift @@ -168,3 +168,7 @@ struct TestFieldOfEnum{ 2: optional Operation op2 } +struct StringAndBinary { + 1: required string s; + 2: required binary b; +} http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java b/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java new file mode 100644 index 0000000..36fb7e6 --- /dev/null +++ b/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package parquet.hadoop.thrift; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import parquet.hadoop.ParquetReader; +import parquet.hadoop.metadata.CompressionCodecName; +import parquet.thrift.ThriftParquetReader; +import parquet.thrift.ThriftParquetWriter; +import parquet.thrift.test.binary.StringAndBinary; + +public class TestBinary { + @Rule + public TemporaryFolder tempDir = new TemporaryFolder(); + + @Test + public void testBinary() throws IOException { + StringAndBinary expected = new StringAndBinary("test", + ByteBuffer.wrap(new byte[] { -123, 20, 33 })); + File temp = tempDir.newFile(UUID.randomUUID().toString()); + temp.deleteOnExit(); + temp.delete(); + + Path path = new Path(temp.getPath()); + + ThriftParquetWriter writer = + new ThriftParquetWriter( + path, StringAndBinary.class, CompressionCodecName.SNAPPY); + writer.write(expected); + writer.close(); + + ParquetReader reader = ThriftParquetReader. + build(path) + .withThriftClass(StringAndBinary.class) + .build(); + StringAndBinary record = reader.read(); + reader.close(); + + Assert.assertEquals("Should match after serialization round trip", + expected, record); + } +} http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-thrift/src/test/thrift/binary.thrift ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/test/thrift/binary.thrift b/parquet-thrift/src/test/thrift/binary.thrift new file mode 100644 index 0000000..fa80424 --- /dev/null +++ b/parquet-thrift/src/test/thrift/binary.thrift @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +namespace java parquet.thrift.test.binary + +struct StringAndBinary { + 1: required string s; + 2: required binary b; +}