Return-Path: X-Original-To: apmail-parquet-commits-archive@minotaur.apache.org Delivered-To: apmail-parquet-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 86F20176AF for ; Mon, 27 Apr 2015 23:11:59 +0000 (UTC) Received: (qmail 48227 invoked by uid 500); 27 Apr 2015 23:11:59 -0000 Delivered-To: apmail-parquet-commits-archive@parquet.apache.org Received: (qmail 48170 invoked by uid 500); 27 Apr 2015 23:11:59 -0000 Mailing-List: contact commits-help@parquet.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.incubator.apache.org Delivered-To: mailing list commits@parquet.incubator.apache.org Received: (qmail 47439 invoked by uid 99); 27 Apr 2015 23:11:58 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Apr 2015 23:11:58 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id AA853E17E9; Mon, 27 Apr 2015 23:11:58 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: blue@apache.org To: commits@parquet.apache.org Date: Mon, 27 Apr 2015 23:12:19 -0000 Message-Id: <30f53adc96aa409893e1f081722ac75b@git.apache.org> In-Reply-To: <190ba0de36204468a8a5a0e9d143ae5d@git.apache.org> References: <190ba0de36204468a8a5a0e9d143ae5d@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [22/51] [partial] parquet-mr git commit: PARQUET-23: Rename to org.apache.parquet. http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/delta/benchmark/SmallRangeWritingBenchmarkTest.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/delta/benchmark/SmallRangeWritingBenchmarkTest.java b/parquet-column/src/test/java/parquet/column/values/delta/benchmark/SmallRangeWritingBenchmarkTest.java deleted file mode 100644 index 39cf0a7..0000000 --- a/parquet-column/src/test/java/parquet/column/values/delta/benchmark/SmallRangeWritingBenchmarkTest.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.delta.benchmark; - -import com.carrotsearch.junitbenchmarks.BenchmarkOptions; -import com.carrotsearch.junitbenchmarks.annotation.AxisRange; -import com.carrotsearch.junitbenchmarks.annotation.BenchmarkMethodChart; -import org.junit.BeforeClass; -import org.junit.Test; -import parquet.column.values.ValuesWriter; -import parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter; -import java.util.Random; - -@AxisRange(min = 0, max = 2) -@BenchmarkMethodChart(filePrefix = "benchmark-encoding-writing-random-small") -public class SmallRangeWritingBenchmarkTest extends RandomWritingBenchmarkTest { - @BeforeClass - public static void prepare() { - Random random=new Random(); - data = new int[100000 * blockSize]; - for (int i = 0; i < data.length; i++) { - data[i] = random.nextInt(2) - 1; - } - } - - @BenchmarkOptions(benchmarkRounds = 10, warmupRounds = 2) - @Test - public void writeRLEWithSmallBitWidthTest(){ - ValuesWriter writer = new RunLengthBitPackingHybridValuesWriter(2, 100, 20000); - runWriteTest(writer); - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java b/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java deleted file mode 100644 index 38c5b52..0000000 --- a/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.deltalengthbytearray; - -import java.io.IOException; - -import org.junit.Test; -import org.junit.Assert; - -import parquet.column.values.Utils; -import parquet.column.values.ValuesReader; -import parquet.column.values.delta.DeltaBinaryPackingValuesReader; -import parquet.io.api.Binary; - -public class TestDeltaLengthByteArray { - - String[] values = { "parquet", "hadoop", "mapreduce"}; - - @Test - public void testSerialization () throws IOException { - DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024); - DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader(); - - Utils.writeData(writer, values); - Binary[] bin = Utils.readData(reader, writer.getBytes().toByteArray(), values.length); - - for(int i =0; i< bin.length ; i++) { - Assert.assertEquals(Binary.fromString(values[i]), bin[i]); - } - } - - @Test - public void testRandomStrings() throws IOException { - DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024); - DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader(); - - String[] values = Utils.getRandomStringSamples(1000, 32); - Utils.writeData(writer, values); - Binary[] bin = Utils.readData(reader, writer.getBytes().toByteArray(), values.length); - - for(int i =0; i< bin.length ; i++) { - Assert.assertEquals(Binary.fromString(values[i]), bin[i]); - } - } - - @Test - public void testLengths() throws IOException { - DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024); - ValuesReader reader = new DeltaBinaryPackingValuesReader(); - - Utils.writeData(writer, values); - int[] bin = Utils.readInts(reader, writer.getBytes().toByteArray(), values.length); - - for(int i =0; i< bin.length ; i++) { - Assert.assertEquals(values[i].length(), bin[i]); - } - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/benchmark/BenchmarkDeltaLengthByteArray.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/benchmark/BenchmarkDeltaLengthByteArray.java b/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/benchmark/BenchmarkDeltaLengthByteArray.java deleted file mode 100644 index c74db12..0000000 --- a/parquet-column/src/test/java/parquet/column/values/deltalengthbytearray/benchmark/BenchmarkDeltaLengthByteArray.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.deltalengthbytearray.benchmark; - -import java.io.IOException; - -import org.junit.Rule; -import org.junit.Test; - -import parquet.column.values.Utils; -import parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesReader; -import parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesWriter; -import parquet.column.values.plain.BinaryPlainValuesReader; -import parquet.column.values.plain.PlainValuesWriter; -import parquet.io.api.Binary; - -import com.carrotsearch.junitbenchmarks.BenchmarkOptions; -import com.carrotsearch.junitbenchmarks.BenchmarkRule; -import com.carrotsearch.junitbenchmarks.annotation.AxisRange; -import com.carrotsearch.junitbenchmarks.annotation.BenchmarkMethodChart; - -@AxisRange(min = 0, max = 1) -@BenchmarkMethodChart(filePrefix = "benchmark-encoding-writing-random") -public class BenchmarkDeltaLengthByteArray { - - @Rule - public org.junit.rules.TestRule benchmarkRun = new BenchmarkRule(); - - String[] values = Utils.getRandomStringSamples(1000000, 32); - - @BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) - @Test - public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException { - PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024); - BinaryPlainValuesReader reader = new BinaryPlainValuesReader(); - - Utils.writeData(writer, values); - byte [] data = writer.getBytes().toByteArray(); - Binary[] bin = Utils.readData(reader, data, values.length); - System.out.println("size " + data.length); - } - - @BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) - @Test - public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException { - DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024); - DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader(); - - Utils.writeData(writer, values); - byte [] data = writer.getBytes().toByteArray(); - Binary[] bin = Utils.readData(reader, data, values.length); - System.out.println("size " + data.length); - } - -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/deltastrings/TestDeltaByteArray.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/deltastrings/TestDeltaByteArray.java b/parquet-column/src/test/java/parquet/column/values/deltastrings/TestDeltaByteArray.java deleted file mode 100644 index 24dee24..0000000 --- a/parquet-column/src/test/java/parquet/column/values/deltastrings/TestDeltaByteArray.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.deltastrings; - -import java.io.IOException; - -import org.junit.Test; -import org.junit.Assert; - -import parquet.column.values.Utils; -import parquet.column.values.ValuesReader; -import parquet.column.values.delta.DeltaBinaryPackingValuesReader; -import parquet.io.api.Binary; - -public class TestDeltaByteArray { - - static String[] values = {"parquet-mr", "parquet", "parquet-format"}; - static String[] randvalues = Utils.getRandomStringSamples(10000, 32); - - @Test - public void testSerialization () throws IOException { - DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024); - DeltaByteArrayReader reader = new DeltaByteArrayReader(); - - Utils.writeData(writer, values); - Binary[] bin = Utils.readData(reader, writer.getBytes().toByteArray(), values.length); - - for(int i =0; i< bin.length ; i++) { - Assert.assertEquals(Binary.fromString(values[i]), bin[i]); - } - } - - @Test - public void testRandomStrings() throws IOException { - DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024); - DeltaByteArrayReader reader = new DeltaByteArrayReader(); - - Utils.writeData(writer, randvalues); - Binary[] bin = Utils.readData(reader, writer.getBytes().toByteArray(), randvalues.length); - - for(int i =0; i< bin.length ; i++) { - Assert.assertEquals(Binary.fromString(randvalues[i]), bin[i]); - } - } - - @Test - public void testLengths() throws IOException { - DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024); - ValuesReader reader = new DeltaBinaryPackingValuesReader(); - - Utils.writeData(writer, values); - byte[] data = writer.getBytes().toByteArray(); - int[] bin = Utils.readInts(reader, data, values.length); - - // test prefix lengths - Assert.assertEquals(0, bin[0]); - Assert.assertEquals(7, bin[1]); - Assert.assertEquals(7, bin[2]); - - int offset = reader.getNextOffset(); - reader = new DeltaBinaryPackingValuesReader(); - bin = Utils.readInts(reader, writer.getBytes().toByteArray(), offset, values.length); - // test suffix lengths - Assert.assertEquals(10, bin[0]); - Assert.assertEquals(0, bin[1]); - Assert.assertEquals(7, bin[2]); - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/deltastrings/benchmark/BenchmarkDeltaByteArray.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/deltastrings/benchmark/BenchmarkDeltaByteArray.java b/parquet-column/src/test/java/parquet/column/values/deltastrings/benchmark/BenchmarkDeltaByteArray.java deleted file mode 100644 index 2c67bde..0000000 --- a/parquet-column/src/test/java/parquet/column/values/deltastrings/benchmark/BenchmarkDeltaByteArray.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.deltastrings.benchmark; - -import java.io.IOException; -import java.util.Arrays; - -import org.junit.Rule; -import org.junit.Test; - -import parquet.column.values.Utils; -import parquet.column.values.deltastrings.DeltaByteArrayReader; -import parquet.column.values.deltastrings.DeltaByteArrayWriter; -import parquet.column.values.plain.BinaryPlainValuesReader; -import parquet.column.values.plain.PlainValuesWriter; -import parquet.io.api.Binary; - -import com.carrotsearch.junitbenchmarks.BenchmarkOptions; -import com.carrotsearch.junitbenchmarks.BenchmarkRule; -import com.carrotsearch.junitbenchmarks.annotation.AxisRange; -import com.carrotsearch.junitbenchmarks.annotation.BenchmarkMethodChart; - -@AxisRange(min = 0, max = 1) -@BenchmarkMethodChart(filePrefix = "benchmark-encoding-writing-random") -public class BenchmarkDeltaByteArray { - - @Rule - public org.junit.rules.TestRule benchmarkRun = new BenchmarkRule(); - - static String[] values = Utils.getRandomStringSamples(1000000, 32); - static String[] sortedVals; - static - { - sortedVals = Arrays.copyOf(values, values.length); - Arrays.sort(sortedVals); - } - - @BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) - @Test - public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException { - PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024); - BinaryPlainValuesReader reader = new BinaryPlainValuesReader(); - - Utils.writeData(writer, values); - byte [] data = writer.getBytes().toByteArray(); - Binary[] bin = Utils.readData(reader, data, values.length); - System.out.println("size " + data.length); - } - - @BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) - @Test - public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException { - DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024); - DeltaByteArrayReader reader = new DeltaByteArrayReader(); - - Utils.writeData(writer, values); - byte [] data = writer.getBytes().toByteArray(); - Binary[] bin = Utils.readData(reader, data, values.length); - System.out.println("size " + data.length); - } - - @BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) - @Test - public void benchmarkSortedStringsWithPlainValuesWriter() throws IOException { - PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024); - BinaryPlainValuesReader reader = new BinaryPlainValuesReader(); - - Utils.writeData(writer, sortedVals); - byte [] data = writer.getBytes().toByteArray(); - Binary[] bin = Utils.readData(reader, data, values.length); - System.out.println("size " + data.length); - } - - @BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) - @Test - public void benchmarkSortedStringsWithDeltaLengthByteArrayValuesWriter() throws IOException { - DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024); - DeltaByteArrayReader reader = new DeltaByteArrayReader(); - - Utils.writeData(writer, sortedVals); - byte [] data = writer.getBytes().toByteArray(); - Binary[] bin = Utils.readData(reader, data, values.length); - System.out.println("size " + data.length); - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/dictionary/TestDictionary.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/dictionary/TestDictionary.java b/parquet-column/src/test/java/parquet/column/values/dictionary/TestDictionary.java deleted file mode 100644 index 93f896d..0000000 --- a/parquet-column/src/test/java/parquet/column/values/dictionary/TestDictionary.java +++ /dev/null @@ -1,531 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.dictionary; - -import static org.junit.Assert.assertEquals; -import static parquet.column.Encoding.PLAIN; -import static parquet.column.Encoding.PLAIN_DICTIONARY; -import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; -import static parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; -import static parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; -import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; - -import java.io.IOException; - -import org.junit.Assert; -import org.junit.Test; - -import parquet.bytes.BytesInput; -import parquet.column.ColumnDescriptor; -import parquet.column.Dictionary; -import parquet.column.Encoding; -import parquet.column.page.DictionaryPage; -import parquet.column.values.ValuesReader; -import parquet.column.values.ValuesWriter; -import parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter; -import parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter; -import parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter; -import parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter; -import parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter; -import parquet.column.values.fallback.FallbackValuesWriter; -import parquet.column.values.plain.BinaryPlainValuesReader; -import parquet.column.values.plain.PlainValuesReader; -import parquet.column.values.plain.PlainValuesWriter; -import parquet.io.api.Binary; -import parquet.schema.PrimitiveType.PrimitiveTypeName; - -public class TestDictionary { - - private FallbackValuesWriter plainFallBack(I dvw, int initialSize) { - return FallbackValuesWriter.of(dvw, new PlainValuesWriter(initialSize, initialSize * 5)); - } - - private FallbackValuesWriter newPlainBinaryDictionaryValuesWriter(int maxDictionaryByteSize, int initialSize) { - return plainFallBack(new PlainBinaryDictionaryValuesWriter(maxDictionaryByteSize, PLAIN_DICTIONARY, PLAIN_DICTIONARY), initialSize); - } - - private FallbackValuesWriter newPlainLongDictionaryValuesWriter(int maxDictionaryByteSize, int initialSize) { - return plainFallBack(new PlainLongDictionaryValuesWriter(maxDictionaryByteSize, PLAIN_DICTIONARY, PLAIN_DICTIONARY), initialSize); - } - - private FallbackValuesWriter newPlainIntegerDictionaryValuesWriter(int maxDictionaryByteSize, int initialSize) { - return plainFallBack(new PlainIntegerDictionaryValuesWriter(maxDictionaryByteSize, PLAIN_DICTIONARY, PLAIN_DICTIONARY), initialSize); - } - - private FallbackValuesWriter newPlainDoubleDictionaryValuesWriter(int maxDictionaryByteSize, int initialSize) { - return plainFallBack(new PlainDoubleDictionaryValuesWriter(maxDictionaryByteSize, PLAIN_DICTIONARY, PLAIN_DICTIONARY), initialSize); - } - - private FallbackValuesWriter newPlainFloatDictionaryValuesWriter(int maxDictionaryByteSize, int initialSize) { - return plainFallBack(new PlainFloatDictionaryValuesWriter(maxDictionaryByteSize, PLAIN_DICTIONARY, PLAIN_DICTIONARY), initialSize); - } - - @Test - public void testBinaryDictionary() throws IOException { - int COUNT = 100; - ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(200, 10000); - writeRepeated(COUNT, cw, "a"); - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - writeRepeated(COUNT, cw, "b"); - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - // now we will fall back - writeDistinct(COUNT, cw, "c"); - BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN); - - DictionaryValuesReader cr = initDicReader(cw, BINARY); - checkRepeated(COUNT, bytes1, cr, "a"); - checkRepeated(COUNT, bytes2, cr, "b"); - BinaryPlainValuesReader cr2 = new BinaryPlainValuesReader(); - checkDistinct(COUNT, bytes3, cr2, "c"); - } - - @Test - public void testBinaryDictionaryFallBack() throws IOException { - int slabSize = 100; - int maxDictionaryByteSize = 50; - final ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(maxDictionaryByteSize, slabSize); - int fallBackThreshold = maxDictionaryByteSize; - int dataSize=0; - for (long i = 0; i < 100; i++) { - Binary binary = Binary.fromString("str" + i); - cw.writeBytes(binary); - dataSize += (binary.length() + 4); - if (dataSize < fallBackThreshold) { - assertEquals(PLAIN_DICTIONARY, cw.getEncoding()); - } else { - assertEquals(PLAIN, cw.getEncoding()); - } - } - - //Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back - ValuesReader reader = new BinaryPlainValuesReader(); - reader.initFromPage(100, cw.getBytes().toByteArray(), 0); - - for (long i = 0; i < 100; i++) { - assertEquals(Binary.fromString("str" + i), reader.readBytes()); - } - - //simulate cutting the page - cw.reset(); - assertEquals(0, cw.getBufferedSize()); - } - - @Test - public void testBinaryDictionaryChangedValues() throws IOException { - int COUNT = 100; - ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(200, 10000); - writeRepeatedWithReuse(COUNT, cw, "a"); - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - writeRepeatedWithReuse(COUNT, cw, "b"); - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - // now we will fall back - writeDistinct(COUNT, cw, "c"); - BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN); - - DictionaryValuesReader cr = initDicReader(cw, BINARY); - checkRepeated(COUNT, bytes1, cr, "a"); - checkRepeated(COUNT, bytes2, cr, "b"); - BinaryPlainValuesReader cr2 = new BinaryPlainValuesReader(); - checkDistinct(COUNT, bytes3, cr2, "c"); - } - - @Test - public void testFirstPageFallBack() throws IOException { - int COUNT = 1000; - ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(10000, 10000); - writeDistinct(COUNT, cw, "a"); - // not efficient so falls back - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN); - writeRepeated(COUNT, cw, "b"); - // still plain because we fell back on first page - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN); - - ValuesReader cr = new BinaryPlainValuesReader(); - checkDistinct(COUNT, bytes1, cr, "a"); - checkRepeated(COUNT, bytes2, cr, "b"); - - } - - @Test - public void testSecondPageFallBack() throws IOException { - int COUNT = 1000; - ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000); - writeRepeated(COUNT, cw, "a"); - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - writeDistinct(COUNT, cw, "b"); - // not efficient so falls back - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN); - writeRepeated(COUNT, cw, "a"); - // still plain because we fell back on previous page - BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN); - - ValuesReader cr = initDicReader(cw, BINARY); - checkRepeated(COUNT, bytes1, cr, "a"); - cr = new BinaryPlainValuesReader(); - checkDistinct(COUNT, bytes2, cr, "b"); - checkRepeated(COUNT, bytes3, cr, "a"); - } - - @Test - public void testLongDictionary() throws IOException { - int COUNT = 1000; - int COUNT2 = 2000; - final FallbackValuesWriter cw = newPlainLongDictionaryValuesWriter(10000, 10000); - for (long i = 0; i < COUNT; i++) { - cw.writeLong(i % 50); - } - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - for (long i = COUNT2; i > 0; i--) { - cw.writeLong(i % 50); - } - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - DictionaryValuesReader cr = initDicReader(cw, PrimitiveTypeName.INT64); - - cr.initFromPage(COUNT, bytes1.toByteArray(), 0); - for (long i = 0; i < COUNT; i++) { - long back = cr.readLong(); - assertEquals(i % 50, back); - } - - cr.initFromPage(COUNT2, bytes2.toByteArray(), 0); - for (long i = COUNT2; i > 0; i--) { - long back = cr.readLong(); - assertEquals(i % 50, back); - } - } - - private void roundTripLong(FallbackValuesWriter cw, ValuesReader reader, int maxDictionaryByteSize) throws IOException { - int fallBackThreshold = maxDictionaryByteSize / 8; - for (long i = 0; i < 100; i++) { - cw.writeLong(i); - if (i < fallBackThreshold) { - assertEquals(cw.getEncoding(), PLAIN_DICTIONARY); - } else { - assertEquals(cw.getEncoding(), PLAIN); - } - } - - reader.initFromPage(100, cw.getBytes().toByteArray(), 0); - - for (long i = 0; i < 100; i++) { - assertEquals(i, reader.readLong()); - } - } - - @Test - public void testLongDictionaryFallBack() throws IOException { - int slabSize = 100; - int maxDictionaryByteSize = 50; - final FallbackValuesWriter cw = newPlainLongDictionaryValuesWriter(maxDictionaryByteSize, slabSize); - // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back - ValuesReader reader = new PlainValuesReader.LongPlainValuesReader(); - - roundTripLong(cw, reader, maxDictionaryByteSize); - //simulate cutting the page - cw.reset(); - assertEquals(0,cw.getBufferedSize()); - cw.resetDictionary(); - - roundTripLong(cw, reader, maxDictionaryByteSize); - } - - @Test - public void testDoubleDictionary() throws IOException { - - int COUNT = 1000; - int COUNT2 = 2000; - final FallbackValuesWriter cw = newPlainDoubleDictionaryValuesWriter(10000, 10000); - - for (double i = 0; i < COUNT; i++) { - cw.writeDouble(i % 50); - } - - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - for (double i = COUNT2; i > 0; i--) { - cw.writeDouble(i % 50); - } - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - final DictionaryValuesReader cr = initDicReader(cw, DOUBLE); - - cr.initFromPage(COUNT, bytes1.toByteArray(), 0); - for (double i = 0; i < COUNT; i++) { - double back = cr.readDouble(); - assertEquals(i % 50, back, 0.0); - } - - cr.initFromPage(COUNT2, bytes2.toByteArray(), 0); - for (double i = COUNT2; i > 0; i--) { - double back = cr.readDouble(); - assertEquals(i % 50, back, 0.0); - } - - } - - private void roundTripDouble(FallbackValuesWriter cw, ValuesReader reader, int maxDictionaryByteSize) throws IOException { - int fallBackThreshold = maxDictionaryByteSize / 8; - for (double i = 0; i < 100; i++) { - cw.writeDouble(i); - if (i < fallBackThreshold) { - assertEquals(cw.getEncoding(), PLAIN_DICTIONARY); - } else { - assertEquals(cw.getEncoding(), PLAIN); - } - } - - reader.initFromPage(100, cw.getBytes().toByteArray(), 0); - - for (double i = 0; i < 100; i++) { - assertEquals(i, reader.readDouble(), 0.00001); - } - } - - @Test - public void testDoubleDictionaryFallBack() throws IOException { - int slabSize = 100; - int maxDictionaryByteSize = 50; - final FallbackValuesWriter cw = newPlainDoubleDictionaryValuesWriter(maxDictionaryByteSize, slabSize); - - // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back - ValuesReader reader = new PlainValuesReader.DoublePlainValuesReader(); - - roundTripDouble(cw, reader, maxDictionaryByteSize); - //simulate cutting the page - cw.reset(); - assertEquals(0,cw.getBufferedSize()); - cw.resetDictionary(); - - roundTripDouble(cw, reader, maxDictionaryByteSize); - } - - @Test - public void testIntDictionary() throws IOException { - - int COUNT = 2000; - int COUNT2 = 4000; - final FallbackValuesWriter cw = newPlainIntegerDictionaryValuesWriter(10000, 10000); - - for (int i = 0; i < COUNT; i++) { - cw.writeInteger(i % 50); - } - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - for (int i = COUNT2; i > 0; i--) { - cw.writeInteger(i % 50); - } - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - DictionaryValuesReader cr = initDicReader(cw, INT32); - - cr.initFromPage(COUNT, bytes1.toByteArray(), 0); - for (int i = 0; i < COUNT; i++) { - int back = cr.readInteger(); - assertEquals(i % 50, back); - } - - cr.initFromPage(COUNT2, bytes2.toByteArray(), 0); - for (int i = COUNT2; i > 0; i--) { - int back = cr.readInteger(); - assertEquals(i % 50, back); - } - - } - - private void roundTripInt(FallbackValuesWriter cw, ValuesReader reader, int maxDictionaryByteSize) throws IOException { - int fallBackThreshold = maxDictionaryByteSize / 4; - for (int i = 0; i < 100; i++) { - cw.writeInteger(i); - if (i < fallBackThreshold) { - assertEquals(cw.getEncoding(), PLAIN_DICTIONARY); - } else { - assertEquals(cw.getEncoding(), PLAIN); - } - } - - reader.initFromPage(100, cw.getBytes().toByteArray(), 0); - - for (int i = 0; i < 100; i++) { - assertEquals(i, reader.readInteger()); - } - } - - @Test - public void testIntDictionaryFallBack() throws IOException { - int slabSize = 100; - int maxDictionaryByteSize = 50; - final FallbackValuesWriter cw = newPlainIntegerDictionaryValuesWriter(maxDictionaryByteSize, slabSize); - - // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back - ValuesReader reader = new PlainValuesReader.IntegerPlainValuesReader(); - - roundTripInt(cw, reader, maxDictionaryByteSize); - //simulate cutting the page - cw.reset(); - assertEquals(0,cw.getBufferedSize()); - cw.resetDictionary(); - - roundTripInt(cw, reader, maxDictionaryByteSize); - } - - @Test - public void testFloatDictionary() throws IOException { - - int COUNT = 2000; - int COUNT2 = 4000; - final FallbackValuesWriter cw = newPlainFloatDictionaryValuesWriter(10000, 10000); - - for (float i = 0; i < COUNT; i++) { - cw.writeFloat(i % 50); - } - BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - for (float i = COUNT2; i > 0; i--) { - cw.writeFloat(i % 50); - } - BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - assertEquals(50, cw.initialWriter.getDictionarySize()); - - DictionaryValuesReader cr = initDicReader(cw, FLOAT); - - cr.initFromPage(COUNT, bytes1.toByteArray(), 0); - for (float i = 0; i < COUNT; i++) { - float back = cr.readFloat(); - assertEquals(i % 50, back, 0.0f); - } - - cr.initFromPage(COUNT2, bytes2.toByteArray(), 0); - for (float i = COUNT2; i > 0; i--) { - float back = cr.readFloat(); - assertEquals(i % 50, back, 0.0f); - } - - } - - private void roundTripFloat(FallbackValuesWriter cw, ValuesReader reader, int maxDictionaryByteSize) throws IOException { - int fallBackThreshold = maxDictionaryByteSize / 4; - for (float i = 0; i < 100; i++) { - cw.writeFloat(i); - if (i < fallBackThreshold) { - assertEquals(cw.getEncoding(), PLAIN_DICTIONARY); - } else { - assertEquals(cw.getEncoding(), PLAIN); - } - } - - reader.initFromPage(100, cw.getBytes().toByteArray(), 0); - - for (float i = 0; i < 100; i++) { - assertEquals(i, reader.readFloat(), 0.00001); - } - } - - @Test - public void testFloatDictionaryFallBack() throws IOException { - int slabSize = 100; - int maxDictionaryByteSize = 50; - final FallbackValuesWriter cw = newPlainFloatDictionaryValuesWriter(maxDictionaryByteSize, slabSize); - - // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back - ValuesReader reader = new PlainValuesReader.FloatPlainValuesReader(); - - roundTripFloat(cw, reader, maxDictionaryByteSize); - //simulate cutting the page - cw.reset(); - assertEquals(0,cw.getBufferedSize()); - cw.resetDictionary(); - - roundTripFloat(cw, reader, maxDictionaryByteSize); - } - - @Test - public void testZeroValues() throws IOException { - FallbackValuesWriter cw = newPlainIntegerDictionaryValuesWriter(100, 100); - cw.writeInteger(34); - cw.writeInteger(34); - getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); - DictionaryValuesReader reader = initDicReader(cw, INT32); - - // pretend there are 100 nulls. what matters is offset = bytes.length. - byte[] bytes = {0x00, 0x01, 0x02, 0x03}; // data doesn't matter - int offset = bytes.length; - reader.initFromPage(100, bytes, offset); - } - - private DictionaryValuesReader initDicReader(ValuesWriter cw, PrimitiveTypeName type) - throws IOException { - final DictionaryPage dictionaryPage = cw.createDictionaryPage().copy(); - final ColumnDescriptor descriptor = new ColumnDescriptor(new String[] {"foo"}, type, 0, 0); - final Dictionary dictionary = PLAIN.initDictionary(descriptor, dictionaryPage); - final DictionaryValuesReader cr = new DictionaryValuesReader(dictionary); - return cr; - } - - private void checkDistinct(int COUNT, BytesInput bytes, ValuesReader cr, String prefix) throws IOException { - cr.initFromPage(COUNT, bytes.toByteArray(), 0); - for (int i = 0; i < COUNT; i++) { - Assert.assertEquals(prefix + i, cr.readBytes().toStringUsingUTF8()); - } - } - - private void checkRepeated(int COUNT, BytesInput bytes, ValuesReader cr, String prefix) throws IOException { - cr.initFromPage(COUNT, bytes.toByteArray(), 0); - for (int i = 0; i < COUNT; i++) { - Assert.assertEquals(prefix + i % 10, cr.readBytes().toStringUsingUTF8()); - } - } - - private void writeDistinct(int COUNT, ValuesWriter cw, String prefix) { - for (int i = 0; i < COUNT; i++) { - cw.writeBytes(Binary.fromString(prefix + i)); - } - } - - private void writeRepeated(int COUNT, ValuesWriter cw, String prefix) { - for (int i = 0; i < COUNT; i++) { - cw.writeBytes(Binary.fromString(prefix + i % 10)); - } - } - - private void writeRepeatedWithReuse(int COUNT, ValuesWriter cw, String prefix) { - Binary reused = Binary.fromString(prefix + "0"); - for (int i = 0; i < COUNT; i++) { - Binary content = Binary.fromString(prefix + i % 10); - System.arraycopy(content.getBytes(), 0, reused.getBytes(), 0, reused.length()); - cw.writeBytes(reused); - } - } - - private BytesInput getBytesAndCheckEncoding(ValuesWriter cw, Encoding encoding) - throws IOException { - BytesInput bytes = BytesInput.copy(cw.getBytes()); - assertEquals(encoding, cw.getEncoding()); - cw.reset(); - return bytes; - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/rle/RunLengthBitPackingHybridIntegrationTest.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/rle/RunLengthBitPackingHybridIntegrationTest.java b/parquet-column/src/test/java/parquet/column/values/rle/RunLengthBitPackingHybridIntegrationTest.java deleted file mode 100644 index 80a7b0a..0000000 --- a/parquet-column/src/test/java/parquet/column/values/rle/RunLengthBitPackingHybridIntegrationTest.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.rle; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; - -import org.junit.Test; - -import static org.junit.Assert.assertEquals; - -/** - * @author Alex Levenson - */ -public class RunLengthBitPackingHybridIntegrationTest { - - @Test - public void integrationTest() throws Exception { - for (int i = 0; i <= 32; i++) { - doIntegrationTest(i); - } - } - - private void doIntegrationTest(int bitWidth) throws Exception { - long modValue = 1L << bitWidth; - - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, 1000, 64000); - int numValues = 0; - - for (int i = 0; i < 100; i++) { - encoder.writeInt((int) (i % modValue)); - } - numValues += 100; - - for (int i = 0; i < 100; i++) { - encoder.writeInt((int) (77 % modValue)); - } - numValues += 100; - - for (int i = 0; i < 100; i++) { - encoder.writeInt((int) (88 % modValue)); - } - numValues += 100; - - for (int i = 0; i < 1000; i++) { - encoder.writeInt((int) (i % modValue)); - encoder.writeInt((int) (i % modValue)); - encoder.writeInt((int) (i % modValue)); - } - numValues += 3000; - - for (int i = 0; i < 1000; i++) { - encoder.writeInt((int) (17 % modValue)); - } - numValues += 1000; - - byte[] encodedBytes = encoder.toBytes().toByteArray(); - ByteArrayInputStream in = new ByteArrayInputStream(encodedBytes); - - RunLengthBitPackingHybridDecoder decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in); - - for (int i = 0; i < 100; i++) { - assertEquals(i % modValue, decoder.readInt()); - } - - for (int i = 0; i < 100; i++) { - assertEquals(77 % modValue, decoder.readInt()); - } - - for (int i = 0; i < 100; i++) { - assertEquals(88 % modValue, decoder.readInt()); - } - - for (int i = 0; i < 1000; i++) { - assertEquals(i % modValue, decoder.readInt()); - assertEquals(i % modValue, decoder.readInt()); - assertEquals(i % modValue, decoder.readInt()); - } - - for (int i = 0; i < 1000; i++) { - assertEquals(17 % modValue, decoder.readInt()); - } - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/column/values/rle/TestRunLengthBitPackingHybridEncoder.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/column/values/rle/TestRunLengthBitPackingHybridEncoder.java b/parquet-column/src/test/java/parquet/column/values/rle/TestRunLengthBitPackingHybridEncoder.java deleted file mode 100644 index 82f8e55..0000000 --- a/parquet-column/src/test/java/parquet/column/values/rle/TestRunLengthBitPackingHybridEncoder.java +++ /dev/null @@ -1,322 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.column.values.rle; - -import static org.junit.Assert.assertEquals; - -import java.io.ByteArrayInputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.junit.Test; - -import parquet.bytes.BytesUtils; -import parquet.column.values.bitpacking.BytePacker; -import parquet.column.values.bitpacking.Packer; - -/** - * @author Alex Levenson - */ -public class TestRunLengthBitPackingHybridEncoder { - - @Test - public void testRLEOnly() throws Exception { - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(3, 5, 10); - for (int i = 0; i < 100; i++) { - encoder.writeInt(4); - } - for (int i = 0; i < 100; i++) { - encoder.writeInt(5); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // header = 100 << 1 = 200 - assertEquals(200, BytesUtils.readUnsignedVarInt(is)); - // payload = 4 - assertEquals(4, BytesUtils.readIntLittleEndianOnOneByte(is)); - - // header = 100 << 1 = 200 - assertEquals(200, BytesUtils.readUnsignedVarInt(is)); - // payload = 5 - assertEquals(5, BytesUtils.readIntLittleEndianOnOneByte(is)); - - // end of stream - assertEquals(-1, is.read()); - } - - @Test - public void testRepeatedZeros() throws Exception { - // previousValue is initialized to 0 - // make sure that repeated 0s at the beginning - // of the stream don't trip up the repeat count - - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(3, 5, 10); - for (int i = 0; i < 10; i++) { - encoder.writeInt(0); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // header = 10 << 1 = 20 - assertEquals(20, BytesUtils.readUnsignedVarInt(is)); - // payload = 4 - assertEquals(0, BytesUtils.readIntLittleEndianOnOneByte(is)); - - // end of stream - assertEquals(-1, is.read()); - } - - @Test - public void testBitWidthZero() throws Exception { - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(0, 5, 10); - for (int i = 0; i < 10; i++) { - encoder.writeInt(0); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // header = 10 << 1 = 20 - assertEquals(20, BytesUtils.readUnsignedVarInt(is)); - - // end of stream - assertEquals(-1, is.read()); - } - - @Test - public void testBitPackingOnly() throws Exception { - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(3, 5, 10); - - for (int i = 0; i < 100; i++) { - encoder.writeInt(i % 3); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // header = ((104/8) << 1) | 1 = 27 - assertEquals(27, BytesUtils.readUnsignedVarInt(is)); - - List values = unpack(3, 104, is); - - for (int i = 0; i < 100; i++) { - assertEquals(i % 3, (int) values.get(i)); - } - - // end of stream - assertEquals(-1, is.read()); - } - - @Test - public void testBitPackingOverflow() throws Exception { - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(3, 5, 10); - - for (int i = 0; i < 1000; i++) { - encoder.writeInt(i % 3); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // 504 is the max number of values in a bit packed run - // that still has a header of 1 byte - // header = ((504/8) << 1) | 1 = 127 - assertEquals(127, BytesUtils.readUnsignedVarInt(is)); - List values = unpack(3, 504, is); - - for (int i = 0; i < 504; i++) { - assertEquals(i % 3, (int) values.get(i)); - } - - // there should now be 496 values in another bit-packed run - // header = ((496/8) << 1) | 1 = 125 - assertEquals(125, BytesUtils.readUnsignedVarInt(is)); - values = unpack(3, 496, is); - for (int i = 0; i < 496; i++) { - assertEquals((i + 504) % 3, (int) values.get(i)); - } - - // end of stream - assertEquals(-1, is.read()); - } - - @Test - public void testTransitionFromBitPackingToRle() throws Exception { - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(3, 5, 10); - - // 5 obviously bit-packed values - encoder.writeInt(0); - encoder.writeInt(1); - encoder.writeInt(0); - encoder.writeInt(1); - encoder.writeInt(0); - - // three repeated values, that ought to be bit-packed as well - encoder.writeInt(2); - encoder.writeInt(2); - encoder.writeInt(2); - - // lots more repeated values, that should be rle-encoded - for (int i = 0; i < 100; i++) { - encoder.writeInt(2); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // header = ((8/8) << 1) | 1 = 3 - assertEquals(3, BytesUtils.readUnsignedVarInt(is)); - - List values = unpack(3, 8, is); - assertEquals(Arrays.asList(0, 1, 0, 1, 0, 2, 2, 2), values); - - // header = 100 << 1 = 200 - assertEquals(200, BytesUtils.readUnsignedVarInt(is)); - // payload = 2 - assertEquals(2, BytesUtils.readIntLittleEndianOnOneByte(is)); - - // end of stream - assertEquals(-1, is.read()); - } - - @Test - public void testPaddingZerosOnUnfinishedBitPackedRuns() throws Exception { - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(5, 5, 10); - for (int i = 0; i < 9; i++) { - encoder.writeInt(i+1); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // header = ((16/8) << 1) | 1 = 5 - assertEquals(5, BytesUtils.readUnsignedVarInt(is)); - - List values = unpack(5, 16, is); - - assertEquals(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0), values); - - assertEquals(-1, is.read()); - } - - @Test - public void testSwitchingModes() throws Exception { - RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(9, 100, 1000); - - // rle first - for (int i = 0; i < 25; i++) { - encoder.writeInt(17); - } - - // bit-packing - for (int i = 0; i < 7; i++) { - encoder.writeInt(7); - } - - encoder.writeInt(8); - encoder.writeInt(9); - encoder.writeInt(10); - - // bit-packing followed by rle - for (int i = 0; i < 25; i++) { - encoder.writeInt(6); - } - - // followed by a different rle - for (int i = 0; i < 8; i++) { - encoder.writeInt(5); - } - - ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray()); - - // header = 25 << 1 = 50 - assertEquals(50, BytesUtils.readUnsignedVarInt(is)); - // payload = 17, stored in 2 bytes - assertEquals(17, BytesUtils.readIntLittleEndianOnTwoBytes(is)); - - // header = ((16/8) << 1) | 1 = 5 - assertEquals(5, BytesUtils.readUnsignedVarInt(is)); - List values = unpack(9, 16, is); - int v = 0; - for (int i = 0; i < 7; i++) { - assertEquals(7, (int) values.get(v)); - v++; - } - - assertEquals(8, (int) values.get(v++)); - assertEquals(9, (int) values.get(v++)); - assertEquals(10, (int) values.get(v++)); - - for (int i = 0; i < 6; i++) { - assertEquals(6, (int) values.get(v)); - v++; - } - - // header = 19 << 1 = 38 - assertEquals(38, BytesUtils.readUnsignedVarInt(is)); - // payload = 6, stored in 2 bytes - assertEquals(6, BytesUtils.readIntLittleEndianOnTwoBytes(is)); - - // header = 8 << 1 = 16 - assertEquals(16, BytesUtils.readUnsignedVarInt(is)); - // payload = 5, stored in 2 bytes - assertEquals(5, BytesUtils.readIntLittleEndianOnTwoBytes(is)); - - // end of stream - assertEquals(-1, is.read()); - } - - - @Test - public void testGroupBoundary() throws Exception { - byte[] bytes = new byte[2]; - // Create an RLE byte stream that has 3 values (1 literal group) with - // bit width 2. - bytes[0] = (1 << 1 )| 1; - bytes[1] = (1 << 0) | (2 << 2) | (3 << 4); - ByteArrayInputStream stream = new ByteArrayInputStream(bytes); - RunLengthBitPackingHybridDecoder decoder = new RunLengthBitPackingHybridDecoder(2, stream); - assertEquals(decoder.readInt(), 1); - assertEquals(decoder.readInt(), 2); - assertEquals(decoder.readInt(), 3); - assertEquals(stream.available(), 0); - } - - private static List unpack(int bitWidth, int numValues, ByteArrayInputStream is) - throws Exception { - - BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); - int[] unpacked = new int[8]; - byte[] next8Values = new byte[bitWidth]; - - List values = new ArrayList(numValues); - - while(values.size() < numValues) { - for (int i = 0; i < bitWidth; i++) { - next8Values[i] = (byte) is.read(); - } - - packer.unpack8Values(next8Values, 0, unpacked, 0); - - for (int v = 0; v < 8; v++) { - values.add(unpacked[v]); - } - } - - return values; - } - -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java b/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java deleted file mode 100644 index d182042..0000000 --- a/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.filter2.predicate; - -public class DummyUdp extends UserDefinedPredicate { - - @Override - public boolean keep(Integer value) { - return false; - } - - @Override - public boolean canDrop(Statistics statistics) { - return false; - } - - @Override - public boolean inverseCanDrop(Statistics statistics) { - return false; - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java deleted file mode 100644 index 5e8d197..0000000 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.filter2.predicate; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Serializable; - -import org.junit.Test; - -import parquet.hadoop.metadata.ColumnPath; -import parquet.filter2.predicate.Operators.And; -import parquet.filter2.predicate.Operators.BinaryColumn; -import parquet.filter2.predicate.Operators.DoubleColumn; -import parquet.filter2.predicate.Operators.Eq; -import parquet.filter2.predicate.Operators.Gt; -import parquet.filter2.predicate.Operators.IntColumn; -import parquet.filter2.predicate.Operators.LongColumn; -import parquet.filter2.predicate.Operators.Not; -import parquet.filter2.predicate.Operators.Or; -import parquet.filter2.predicate.Operators.UserDefined; -import parquet.filter2.predicate.Operators.UserDefinedByClass; -import parquet.io.api.Binary; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static parquet.filter2.predicate.FilterApi.and; -import static parquet.filter2.predicate.FilterApi.binaryColumn; -import static parquet.filter2.predicate.FilterApi.doubleColumn; -import static parquet.filter2.predicate.FilterApi.eq; -import static parquet.filter2.predicate.FilterApi.gt; -import static parquet.filter2.predicate.FilterApi.intColumn; -import static parquet.filter2.predicate.FilterApi.longColumn; -import static parquet.filter2.predicate.FilterApi.not; -import static parquet.filter2.predicate.FilterApi.notEq; -import static parquet.filter2.predicate.FilterApi.or; -import static parquet.filter2.predicate.FilterApi.userDefined; -import static parquet.filter2.predicate.Operators.NotEq; - -public class TestFilterApiMethods { - - private static final IntColumn intColumn = intColumn("a.b.c"); - private static final LongColumn longColumn = longColumn("a.b.l"); - private static final DoubleColumn doubleColumn = doubleColumn("x.y.z"); - private static final BinaryColumn binColumn = binaryColumn("a.string.column"); - - private static final FilterPredicate predicate = - and(not(or(eq(intColumn, 7), notEq(intColumn, 17))), gt(doubleColumn, 100.0)); - - @Test - public void testFilterPredicateCreation() { - FilterPredicate outerAnd = predicate; - - assertTrue(outerAnd instanceof And); - - FilterPredicate not = ((And) outerAnd).getLeft(); - FilterPredicate gt = ((And) outerAnd).getRight(); - assertTrue(not instanceof Not); - - FilterPredicate or = ((Not) not).getPredicate(); - assertTrue(or instanceof Or); - - FilterPredicate leftEq = ((Or) or).getLeft(); - FilterPredicate rightNotEq = ((Or) or).getRight(); - assertTrue(leftEq instanceof Eq); - assertTrue(rightNotEq instanceof NotEq); - assertEquals(7, ((Eq) leftEq).getValue()); - assertEquals(17, ((NotEq) rightNotEq).getValue()); - assertEquals(ColumnPath.get("a", "b", "c"), ((Eq) leftEq).getColumn().getColumnPath()); - assertEquals(ColumnPath.get("a", "b", "c"), ((NotEq) rightNotEq).getColumn().getColumnPath()); - - assertTrue(gt instanceof Gt); - assertEquals(100.0, ((Gt) gt).getValue()); - assertEquals(ColumnPath.get("x", "y", "z"), ((Gt) gt).getColumn().getColumnPath()); - } - - @Test - public void testToString() { - FilterPredicate pred = or(predicate, notEq(binColumn, Binary.fromString("foobarbaz"))); - assertEquals("or(and(not(or(eq(a.b.c, 7), noteq(a.b.c, 17))), gt(x.y.z, 100.0)), " - + "noteq(a.string.column, Binary{\"foobarbaz\"}))", - pred.toString()); - } - - @Test - public void testUdp() { - FilterPredicate predicate = or(eq(doubleColumn, 12.0), userDefined(intColumn, DummyUdp.class)); - assertTrue(predicate instanceof Or); - FilterPredicate ud = ((Or) predicate).getRight(); - assertTrue(ud instanceof UserDefinedByClass); - assertEquals(DummyUdp.class, ((UserDefinedByClass) ud).getUserDefinedPredicateClass()); - assertTrue(((UserDefined) ud).getUserDefinedPredicate() instanceof DummyUdp); - } - - @Test - public void testSerializable() throws Exception { - BinaryColumn binary = binaryColumn("foo"); - FilterPredicate p = and(or(and(userDefined(intColumn, DummyUdp.class), predicate), eq(binary, Binary.fromString("hi"))), userDefined(longColumn, new IsMultipleOf(7))); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ObjectOutputStream oos = new ObjectOutputStream(baos); - oos.writeObject(p); - oos.close(); - - ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray())); - FilterPredicate read = (FilterPredicate) is.readObject(); - assertEquals(p, read); - } - - public static class IsMultipleOf extends UserDefinedPredicate implements Serializable { - - private long of; - - public IsMultipleOf(long of) { - this.of = of; - } - - @Override - public boolean keep(Long value) { - if (value == null) { - return false; - } - return value % of == 0; - } - - @Override - public boolean canDrop(Statistics statistics) { - return false; - } - - @Override - public boolean inverseCanDrop(Statistics statistics) { - return false; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - IsMultipleOf that = (IsMultipleOf) o; - return this.of == that.of; - } - - @Override - public int hashCode() { - return new Long(of).hashCode(); - } - - @Override - public String toString() { - return "IsMultipleOf(" + of + ")"; - } - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java deleted file mode 100644 index fca111f..0000000 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.filter2.predicate; - -import org.junit.Test; - -import parquet.filter2.predicate.Operators.DoubleColumn; -import parquet.filter2.predicate.Operators.IntColumn; -import parquet.filter2.predicate.Operators.LogicalNotUserDefined; -import parquet.filter2.predicate.Operators.UserDefined; - -import static org.junit.Assert.assertEquals; -import static parquet.filter2.predicate.FilterApi.and; -import static parquet.filter2.predicate.FilterApi.doubleColumn; -import static parquet.filter2.predicate.FilterApi.eq; -import static parquet.filter2.predicate.FilterApi.gt; -import static parquet.filter2.predicate.FilterApi.gtEq; -import static parquet.filter2.predicate.FilterApi.intColumn; -import static parquet.filter2.predicate.FilterApi.lt; -import static parquet.filter2.predicate.FilterApi.ltEq; -import static parquet.filter2.predicate.FilterApi.not; -import static parquet.filter2.predicate.FilterApi.notEq; -import static parquet.filter2.predicate.FilterApi.or; -import static parquet.filter2.predicate.FilterApi.userDefined; -import static parquet.filter2.predicate.LogicalInverseRewriter.rewrite; - -public class TestLogicalInverseRewriter { - private static final IntColumn intColumn = intColumn("a.b.c"); - private static final DoubleColumn doubleColumn = doubleColumn("a.b.c"); - - private static final FilterPredicate complex = - and( - not( - or(ltEq(doubleColumn, 12.0), - and( - not(or(eq(intColumn, 7), notEq(intColumn, 17))), - userDefined(intColumn, DummyUdp.class)))), - or(gt(doubleColumn, 100.0), not(gtEq(intColumn, 77)))); - - private static final FilterPredicate complexCollapsed = - and( - and(gt(doubleColumn, 12.0), - or( - or(eq(intColumn, 7), notEq(intColumn, 17)), - new LogicalNotUserDefined(userDefined(intColumn, DummyUdp.class)))), - or(gt(doubleColumn, 100.0), lt(intColumn, 77))); - - private static void assertNoOp(FilterPredicate p) { - assertEquals(p, rewrite(p)); - } - - @Test - public void testBaseCases() { - UserDefined ud = userDefined(intColumn, DummyUdp.class); - - assertNoOp(eq(intColumn, 17)); - assertNoOp(notEq(intColumn, 17)); - assertNoOp(lt(intColumn, 17)); - assertNoOp(ltEq(intColumn, 17)); - assertNoOp(gt(intColumn, 17)); - assertNoOp(gtEq(intColumn, 17)); - assertNoOp(and(eq(intColumn, 17), eq(doubleColumn, 12.0))); - assertNoOp(or(eq(intColumn, 17), eq(doubleColumn, 12.0))); - assertNoOp(ud); - - assertEquals(notEq(intColumn, 17), rewrite(not(eq(intColumn, 17)))); - assertEquals(eq(intColumn, 17), rewrite(not(notEq(intColumn, 17)))); - assertEquals(gtEq(intColumn, 17), rewrite(not(lt(intColumn, 17)))); - assertEquals(gt(intColumn, 17), rewrite(not(ltEq(intColumn, 17)))); - assertEquals(ltEq(intColumn, 17), rewrite(not(gt(intColumn, 17)))); - assertEquals(lt(intColumn, 17), rewrite(not(gtEq(intColumn, 17)))); - assertEquals(new LogicalNotUserDefined(ud), rewrite(not(ud))); - - FilterPredicate notedAnd = not(and(eq(intColumn, 17), eq(doubleColumn, 12.0))); - FilterPredicate distributedAnd = or(notEq(intColumn, 17), notEq(doubleColumn, 12.0)); - assertEquals(distributedAnd, rewrite(notedAnd)); - - FilterPredicate andWithNots = and(not(gtEq(intColumn, 17)), lt(intColumn, 7)); - FilterPredicate andWithoutNots = and(lt(intColumn, 17), lt(intColumn, 7)); - assertEquals(andWithoutNots, rewrite(andWithNots)); - } - - @Test - public void testComplex() { - assertEquals(complexCollapsed, rewrite(complex)); - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java deleted file mode 100644 index 9baa786..0000000 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.filter2.predicate; - -import org.junit.Test; - -import parquet.filter2.predicate.Operators.DoubleColumn; -import parquet.filter2.predicate.Operators.IntColumn; -import parquet.filter2.predicate.Operators.LogicalNotUserDefined; -import parquet.filter2.predicate.Operators.UserDefined; - -import static org.junit.Assert.assertEquals; -import static parquet.filter2.predicate.FilterApi.and; -import static parquet.filter2.predicate.FilterApi.doubleColumn; -import static parquet.filter2.predicate.FilterApi.eq; -import static parquet.filter2.predicate.FilterApi.gt; -import static parquet.filter2.predicate.FilterApi.gtEq; -import static parquet.filter2.predicate.FilterApi.intColumn; -import static parquet.filter2.predicate.FilterApi.lt; -import static parquet.filter2.predicate.FilterApi.ltEq; -import static parquet.filter2.predicate.FilterApi.not; -import static parquet.filter2.predicate.FilterApi.notEq; -import static parquet.filter2.predicate.FilterApi.or; -import static parquet.filter2.predicate.FilterApi.userDefined; -import static parquet.filter2.predicate.LogicalInverter.invert; - -public class TestLogicalInverter { - private static final IntColumn intColumn = intColumn("a.b.c"); - private static final DoubleColumn doubleColumn = doubleColumn("a.b.c"); - - private static final UserDefined ud = userDefined(intColumn, DummyUdp.class); - - private static final FilterPredicate complex = - and( - or(ltEq(doubleColumn, 12.0), - and( - not(or(eq(intColumn, 7), notEq(intColumn, 17))), - userDefined(intColumn, DummyUdp.class))), - or(gt(doubleColumn, 100.0), notEq(intColumn, 77))); - - private static final FilterPredicate complexInverse = - or( - and(gt(doubleColumn, 12.0), - or( - or(eq(intColumn, 7), notEq(intColumn, 17)), - new LogicalNotUserDefined(userDefined(intColumn, DummyUdp.class)))), - and(ltEq(doubleColumn, 100.0), eq(intColumn, 77))); - - @Test - public void testBaseCases() { - assertEquals(notEq(intColumn, 17), invert(eq(intColumn, 17))); - assertEquals(eq(intColumn, 17), invert(notEq(intColumn, 17))); - assertEquals(gtEq(intColumn, 17), invert(lt(intColumn, 17))); - assertEquals(gt(intColumn, 17), invert(ltEq(intColumn, 17))); - assertEquals(ltEq(intColumn, 17), invert(gt(intColumn, 17))); - assertEquals(lt(intColumn, 17), invert(gtEq(intColumn, 17))); - - FilterPredicate andPos = and(eq(intColumn, 17), eq(doubleColumn, 12.0)); - FilterPredicate andInv = or(notEq(intColumn, 17), notEq(doubleColumn, 12.0)); - assertEquals(andInv, invert(andPos)); - - FilterPredicate orPos = or(eq(intColumn, 17), eq(doubleColumn, 12.0)); - FilterPredicate orInv = and(notEq(intColumn, 17), notEq(doubleColumn, 12.0)); - assertEquals(orPos, invert(orInv)); - - assertEquals(eq(intColumn, 17), invert(not(eq(intColumn, 17)))); - - UserDefined ud = userDefined(intColumn, DummyUdp.class); - assertEquals(new LogicalNotUserDefined(ud), invert(ud)); - assertEquals(ud, invert(not(ud))); - assertEquals(ud, invert(new LogicalNotUserDefined(ud))); - } - - @Test - public void testComplex() { - assertEquals(complexInverse, invert(complex)); - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java deleted file mode 100644 index 7be50c7..0000000 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.filter2.predicate; - -import org.junit.Test; - -import parquet.filter2.predicate.Operators.BinaryColumn; -import parquet.filter2.predicate.Operators.IntColumn; -import parquet.filter2.predicate.Operators.LongColumn; -import parquet.io.api.Binary; -import parquet.schema.MessageType; -import parquet.schema.MessageTypeParser; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; -import static parquet.filter2.predicate.FilterApi.and; -import static parquet.filter2.predicate.FilterApi.binaryColumn; -import static parquet.filter2.predicate.FilterApi.eq; -import static parquet.filter2.predicate.FilterApi.gt; -import static parquet.filter2.predicate.FilterApi.intColumn; -import static parquet.filter2.predicate.FilterApi.longColumn; -import static parquet.filter2.predicate.FilterApi.ltEq; -import static parquet.filter2.predicate.FilterApi.not; -import static parquet.filter2.predicate.FilterApi.notEq; -import static parquet.filter2.predicate.FilterApi.or; -import static parquet.filter2.predicate.FilterApi.userDefined; -import static parquet.filter2.predicate.SchemaCompatibilityValidator.validate; - -public class TestSchemaCompatibilityValidator { - private static final BinaryColumn stringC = binaryColumn("c"); - private static final LongColumn longBar = longColumn("x.bar"); - private static final IntColumn intBar = intColumn("x.bar"); - private static final LongColumn lotsOfLongs = longColumn("lotsOfLongs"); - - private static final String schemaString = - "message Document {\n" - + " required int32 a;\n" - + " required binary b;\n" - + " required binary c (UTF8);\n" - + " required group x { required int32 bar; }\n" - + " repeated int64 lotsOfLongs;\n" - + "}\n"; - - private static final MessageType schema = MessageTypeParser.parseMessageType(schemaString); - - private static final FilterPredicate complexValid = - and( - or(ltEq(stringC, Binary.fromString("foo")), - and( - not(or(eq(intBar, 17), notEq(intBar, 17))), - userDefined(intBar, DummyUdp.class))), - or(gt(stringC, Binary.fromString("bar")), notEq(stringC, Binary.fromString("baz")))); - - static class LongDummyUdp extends UserDefinedPredicate { - @Override - public boolean keep(Long value) { - return false; - } - - @Override - public boolean canDrop(Statistics statistics) { - return false; - } - - @Override - public boolean inverseCanDrop(Statistics statistics) { - return false; - } - } - - private static final FilterPredicate complexWrongType = - and( - or(ltEq(stringC, Binary.fromString("foo")), - and( - not(or(eq(longBar, 17L), notEq(longBar, 17L))), - userDefined(longBar, LongDummyUdp.class))), - or(gt(stringC, Binary.fromString("bar")), notEq(stringC, Binary.fromString("baz")))); - - private static final FilterPredicate complexMixedType = - and( - or(ltEq(stringC, Binary.fromString("foo")), - and( - not(or(eq(intBar, 17), notEq(longBar, 17L))), - userDefined(longBar, LongDummyUdp.class))), - or(gt(stringC, Binary.fromString("bar")), notEq(stringC, Binary.fromString("baz")))); - - @Test - public void testValidType() { - validate(complexValid, schema); - } - - @Test - public void testFindsInvalidTypes() { - try { - validate(complexWrongType, schema); - fail("this should throw"); - } catch (IllegalArgumentException e) { - assertEquals("FilterPredicate column: x.bar's declared type (java.lang.Long) does not match the schema found in file metadata. " - + "Column x.bar is of type: FullTypeDescriptor(PrimitiveType: INT32, OriginalType: null)\n" - + "Valid types for this column are: [class java.lang.Integer]", e.getMessage()); - } - } - - @Test - public void testTwiceDeclaredColumn() { - validate(eq(stringC, Binary.fromString("larry")), schema); - - try { - validate(complexMixedType, schema); - fail("this should throw"); - } catch (IllegalArgumentException e) { - assertEquals("Column: x.bar was provided with different types in the same predicate. Found both: (class java.lang.Integer, class java.lang.Long)", e.getMessage()); - } - - } - - @Test - public void testRepeatedNotSupported() { - try { - validate(eq(lotsOfLongs, 10l), schema); - fail("this should throw"); - } catch (IllegalArgumentException e) { - assertEquals("FilterPredicates do not currently support repeated columns. Column lotsOfLongs is repeated.", e.getMessage()); - } - } -} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/parquet/filter2/predicate/TestValidTypeMap.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestValidTypeMap.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestValidTypeMap.java deleted file mode 100644 index 2daf143..0000000 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestValidTypeMap.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package parquet.filter2.predicate; - -import org.junit.Test; - -import parquet.hadoop.metadata.ColumnPath; -import parquet.filter2.predicate.Operators.BinaryColumn; -import parquet.filter2.predicate.Operators.BooleanColumn; -import parquet.filter2.predicate.Operators.Column; -import parquet.filter2.predicate.Operators.DoubleColumn; -import parquet.filter2.predicate.Operators.FloatColumn; -import parquet.filter2.predicate.Operators.IntColumn; -import parquet.filter2.predicate.Operators.LongColumn; -import parquet.schema.OriginalType; -import parquet.schema.PrimitiveType.PrimitiveTypeName; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; -import static parquet.filter2.predicate.FilterApi.binaryColumn; -import static parquet.filter2.predicate.FilterApi.booleanColumn; -import static parquet.filter2.predicate.FilterApi.doubleColumn; -import static parquet.filter2.predicate.FilterApi.floatColumn; -import static parquet.filter2.predicate.FilterApi.intColumn; -import static parquet.filter2.predicate.FilterApi.longColumn; -import static parquet.filter2.predicate.ValidTypeMap.assertTypeValid; - -public class TestValidTypeMap { - public static IntColumn intColumn = intColumn("int.column"); - public static LongColumn longColumn = longColumn("long.column"); - public static FloatColumn floatColumn = floatColumn("float.column"); - public static DoubleColumn doubleColumn = doubleColumn("double.column"); - public static BooleanColumn booleanColumn = booleanColumn("boolean.column"); - public static BinaryColumn binaryColumn = binaryColumn("binary.column"); - - private static class InvalidColumnType implements Comparable { - @Override - public int compareTo(InvalidColumnType o) { - return 0; - } - } - - public static Column invalidColumn = - new Column(ColumnPath.get("invalid.column"), InvalidColumnType.class) { }; - - @Test - public void testValidTypes() { - assertTypeValid(intColumn, PrimitiveTypeName.INT32, null); - assertTypeValid(longColumn, PrimitiveTypeName.INT64, null); - assertTypeValid(floatColumn, PrimitiveTypeName.FLOAT, null); - assertTypeValid(doubleColumn, PrimitiveTypeName.DOUBLE, null); - assertTypeValid(booleanColumn, PrimitiveTypeName.BOOLEAN, null); - assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY, null); - assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, null); - assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY, OriginalType.UTF8); - assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, OriginalType.UTF8); - } - - @Test - public void testMismatchedTypes() { - try { - assertTypeValid(intColumn, PrimitiveTypeName.DOUBLE, null); - fail("This should throw!"); - } catch (IllegalArgumentException e) { - assertEquals("FilterPredicate column: int.column's declared type (java.lang.Integer) does not match the " - + "schema found in file metadata. Column int.column is of type: " - + "FullTypeDescriptor(PrimitiveType: DOUBLE, OriginalType: null)\n" - + "Valid types for this column are: [class java.lang.Double]", e.getMessage()); - } - } - - @Test - public void testUnsupportedType() { - try { - assertTypeValid(invalidColumn, PrimitiveTypeName.INT32, null); - fail("This should throw!"); - } catch (IllegalArgumentException e) { - assertEquals("Column invalid.column was declared as type: " - + "parquet.filter2.predicate.TestValidTypeMap$InvalidColumnType which is not supported " - + "in FilterPredicates. Supported types for this column are: [class java.lang.Integer]", e.getMessage()); - } - - try { - assertTypeValid(invalidColumn, PrimitiveTypeName.INT32, OriginalType.UTF8); - fail("This should throw!"); - } catch (IllegalArgumentException e) { - assertEquals("Column invalid.column was declared as type: " - + "parquet.filter2.predicate.TestValidTypeMap$InvalidColumnType which is not supported " - + "in FilterPredicates. There are no supported types for columns of FullTypeDescriptor(PrimitiveType: INT32, OriginalType: UTF8)", - e.getMessage()); - } - - } - -}