Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id E8740200C1B for ; Mon, 30 Jan 2017 20:42:40 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id E7386160B4D; Mon, 30 Jan 2017 19:42:40 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id EFCDA160B61 for ; Mon, 30 Jan 2017 20:42:39 +0100 (CET) Received: (qmail 50866 invoked by uid 500); 30 Jan 2017 19:42:39 -0000 Mailing-List: contact commits-help@drill.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: commits@drill.apache.org Delivered-To: mailing list commits@drill.apache.org Received: (qmail 50763 invoked by uid 99); 30 Jan 2017 19:42:39 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 Jan 2017 19:42:39 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id DD260DFF88; Mon, 30 Jan 2017 19:42:38 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sudheesh@apache.org To: commits@drill.apache.org Date: Mon, 30 Jan 2017 19:42:41 -0000 Message-Id: <73e2a829189f42b792779b3f58d661ed@git.apache.org> In-Reply-To: <81ee6213cb14498b840d8aca172ff427@git.apache.org> References: <81ee6213cb14498b840d8aca172ff427@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [4/4] drill git commit: DRILL-4764: Parquet file with INT_16, etc. logical types not supported by simple SELECT archived-at: Mon, 30 Jan 2017 19:42:41 -0000 DRILL-4764: Parquet file with INT_16, etc. logical types not supported by simple SELECT closes #673 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/60624af2 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/60624af2 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/60624af2 Branch: refs/heads/master Commit: 60624af225f90992a15a707e1650e41ccecf5a53 Parents: 5c3924c Author: Serhii-Harnyk Authored: Thu Nov 24 13:24:03 2016 +0000 Committer: Sudheesh Katkam Committed: Mon Jan 30 10:09:39 2017 -0800 ---------------------------------------------------------------------- .../columnreaders/ColumnReaderFactory.java | 11 +++ .../ParquetFixedWidthDictionaryReaders.java | 72 +++++++++++++++++++ .../ParquetToDrillTypeConverter.java | 9 +++ .../store/parquet2/TestDrillParquetReader.java | 14 ++++ .../test/resources/parquet/uint_types.parquet | Bin 0 -> 1727 bytes 5 files changed, 106 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java index 662d5c9..495f70b 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java @@ -48,6 +48,8 @@ import org.apache.drill.exec.vector.NullableVarBinaryVector; import org.apache.drill.exec.vector.NullableVarCharVector; import org.apache.drill.exec.vector.TimeStampVector; import org.apache.drill.exec.vector.TimeVector; +import org.apache.drill.exec.vector.UInt4Vector; +import org.apache.drill.exec.vector.UInt8Vector; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.VarBinaryVector; import org.apache.drill.exec.vector.VarCharVector; @@ -121,6 +123,13 @@ public class ColumnReaderFactory { return new ParquetFixedWidthDictionaryReaders.DictionaryDecimal9Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (Decimal9Vector) v, schemaElement); case TIME_MILLIS: return new ParquetFixedWidthDictionaryReaders.DictionaryTimeReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (TimeVector) v, schemaElement); + case INT_8: + case INT_16: + return new ParquetFixedWidthDictionaryReaders.DictionaryIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (IntVector) v, schemaElement); + case UINT_8: + case UINT_16: + case UINT_32: + return new ParquetFixedWidthDictionaryReaders.DictionaryUInt4Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (UInt4Vector) v, schemaElement); default: throw new ExecutionSetupException("Unsupported dictionary converted type " + convertedType + " for primitive type INT32"); } @@ -129,6 +138,8 @@ public class ColumnReaderFactory { return new ParquetFixedWidthDictionaryReaders.DictionaryBigIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (BigIntVector) v, schemaElement); } switch (convertedType) { + case UINT_64: + return new ParquetFixedWidthDictionaryReaders.DictionaryUInt8Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (UInt8Vector) v, schemaElement); case DECIMAL: return new ParquetFixedWidthDictionaryReaders.DictionaryDecimal18Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (Decimal18Vector) v, schemaElement); case TIMESTAMP_MILLIS: http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java index d7b6fbb..53a68ab 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java @@ -26,6 +26,8 @@ import org.apache.drill.exec.vector.Float8Vector; import org.apache.drill.exec.vector.IntVector; import org.apache.drill.exec.vector.TimeStampVector; import org.apache.drill.exec.vector.TimeVector; +import org.apache.drill.exec.vector.UInt4Vector; +import org.apache.drill.exec.vector.UInt8Vector; import org.apache.drill.exec.vector.VarBinaryVector; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.format.SchemaElement; @@ -56,6 +58,41 @@ public class ParquetFixedWidthDictionaryReaders { } } + /** + * This class uses for reading unsigned integer fields. + */ + static class DictionaryUInt4Reader extends FixedByteAlignedReader { + DictionaryUInt4Reader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, + ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, UInt4Vector v, + SchemaElement schemaElement) throws ExecutionSetupException { + super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement); + } + + // this method is called by its superclass during a read loop + @Override + protected void readField(long recordsToReadInThisPass) { + + recordsReadInThisIteration = Math.min(pageReader.currentPageCount + - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass); + + if (usingDictionary) { + UInt4Vector.Mutator mutator = valueVec.getMutator(); + for (int i = 0; i < recordsReadInThisIteration; i++) { + mutator.setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readInteger()); + } + // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding + // and we will go into the else condition below. The readField method of the parent class requires the + // writer index to be set correctly. + readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits; + readLength = (int) Math.ceil(readLengthInBits / 8.0); + int writerIndex = valueVec.getBuffer().writerIndex(); + valueVec.getBuffer().setIndex(0, writerIndex + (int) readLength); + } else { + super.readField(recordsToReadInThisPass); + } + } + } + static class DictionaryFixedBinaryReader extends FixedByteAlignedReader { DictionaryFixedBinaryReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarBinaryVector v, @@ -174,6 +211,41 @@ public class ParquetFixedWidthDictionaryReaders { } } + /** + * This class uses for reading unsigned BigInt fields. + */ + static class DictionaryUInt8Reader extends FixedByteAlignedReader { + DictionaryUInt8Reader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, + ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, UInt8Vector v, + SchemaElement schemaElement) throws ExecutionSetupException { + super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement); + } + + // this method is called by its superclass during a read loop + @Override + protected void readField(long recordsToReadInThisPass) { + + recordsReadInThisIteration = Math.min(pageReader.currentPageCount + - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass); + + if (usingDictionary) { + UInt8Vector.Mutator mutator = valueVec.getMutator(); + for (int i = 0; i < recordsReadInThisIteration; i++) { + mutator.setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readLong()); + } + // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding + // and we will go into the else condition below. The readField method of the parent class requires the + // writer index to be set correctly. + readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits; + readLength = (int) Math.ceil(readLengthInBits / 8.0); + int writerIndex = valueVec.getBuffer().writerIndex(); + valueVec.getBuffer().setIndex(0, writerIndex + (int) readLength); + } else { + super.readField(recordsToReadInThisPass); + } + } + } + static class DictionaryDecimal18Reader extends FixedByteAlignedReader { DictionaryDecimal18Reader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, Decimal18Vector v, http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java index be27f3e..3f5f3b2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java @@ -61,6 +61,8 @@ public class ParquetToDrillTypeConverter { return (TypeProtos.MinorType.BIGINT); } switch(convertedType) { + case UINT_64: + return TypeProtos.MinorType.UINT8; case DECIMAL: ParquetReaderUtility.checkDecimalTypeEnabled(options); return TypeProtos.MinorType.DECIMAL18; @@ -77,6 +79,13 @@ public class ParquetToDrillTypeConverter { return TypeProtos.MinorType.INT; } switch(convertedType) { + case UINT_8: + case UINT_16: + case UINT_32: + return TypeProtos.MinorType.UINT4; + case INT_8: + case INT_16: + return TypeProtos.MinorType.INT; case DECIMAL: ParquetReaderUtility.checkDecimalTypeEnabled(options); return TypeProtos.MinorType.DECIMAL9; http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java index b18fd9d..477b825 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java @@ -84,4 +84,18 @@ public class TestDrillParquetReader extends BaseTestQuery { .sqlBaselineQuery("SELECT columns[0] id, CAST(NULLIF(columns[1], '') AS DOUBLE) val FROM cp.`parquet2/4349.csv.gz` WHERE columns[0] = 'b'") .go(); } + + @Test + public void testUnsignedAndSignedIntTypes() throws Exception { + testBuilder() + .unOrdered() + .sqlQuery("select * from cp.`parquet/uint_types.parquet`") + .baselineColumns("uint8_field", "uint16_field", "uint32_field", "uint64_field", "int8_field", "int16_field", + "required_uint8_field", "required_uint16_field", "required_uint32_field", "required_uint64_field", + "required_int8_field", "required_int16_field") + .baselineValues(255, 65535, 2147483647, 9223372036854775807L, 255, 65535, -1, -1, -1, -1L, -2147483648, -2147483648) + .baselineValues(-1, -1, -1, -1L, -2147483648, -2147483648, 255, 65535, 2147483647, 9223372036854775807L, 255, 65535) + .baselineValues(null, null, null, null, null, null, 0, 0, 0, 0L, 0, 0) + .go(); + } } http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/test/resources/parquet/uint_types.parquet ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/parquet/uint_types.parquet b/exec/java-exec/src/test/resources/parquet/uint_types.parquet new file mode 100644 index 0000000..62ea0279 Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/uint_types.parquet differ