drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sudhe...@apache.org
Subject [4/4] drill git commit: DRILL-4764: Parquet file with INT_16, etc. logical types not supported by simple SELECT
Date Mon, 30 Jan 2017 19:42:41 GMT
DRILL-4764: Parquet file with INT_16, etc. logical types not supported by simple SELECT

closes #673


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/60624af2
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/60624af2
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/60624af2

Branch: refs/heads/master
Commit: 60624af225f90992a15a707e1650e41ccecf5a53
Parents: 5c3924c
Author: Serhii-Harnyk <serhii.harnyk@gmail.com>
Authored: Thu Nov 24 13:24:03 2016 +0000
Committer: Sudheesh Katkam <sudheesh@apache.org>
Committed: Mon Jan 30 10:09:39 2017 -0800

----------------------------------------------------------------------
 .../columnreaders/ColumnReaderFactory.java      |  11 +++
 .../ParquetFixedWidthDictionaryReaders.java     |  72 +++++++++++++++++++
 .../ParquetToDrillTypeConverter.java            |   9 +++
 .../store/parquet2/TestDrillParquetReader.java  |  14 ++++
 .../test/resources/parquet/uint_types.parquet   | Bin 0 -> 1727 bytes
 5 files changed, 106 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
index 662d5c9..495f70b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
@@ -48,6 +48,8 @@ import org.apache.drill.exec.vector.NullableVarBinaryVector;
 import org.apache.drill.exec.vector.NullableVarCharVector;
 import org.apache.drill.exec.vector.TimeStampVector;
 import org.apache.drill.exec.vector.TimeVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.UInt8Vector;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.drill.exec.vector.VarBinaryVector;
 import org.apache.drill.exec.vector.VarCharVector;
@@ -121,6 +123,13 @@ public class ColumnReaderFactory {
                   return new ParquetFixedWidthDictionaryReaders.DictionaryDecimal9Reader(recordReader,
allocateSize, descriptor, columnChunkMetaData, fixedLength, (Decimal9Vector) v, schemaElement);
                 case TIME_MILLIS:
                   return new ParquetFixedWidthDictionaryReaders.DictionaryTimeReader(recordReader,
allocateSize, descriptor, columnChunkMetaData, fixedLength, (TimeVector) v, schemaElement);
+                case INT_8:
+                case INT_16:
+                  return new ParquetFixedWidthDictionaryReaders.DictionaryIntReader(recordReader,
allocateSize, descriptor, columnChunkMetaData, fixedLength, (IntVector) v, schemaElement);
+                case UINT_8:
+                case UINT_16:
+                case UINT_32:
+                  return new ParquetFixedWidthDictionaryReaders.DictionaryUInt4Reader(recordReader,
allocateSize, descriptor, columnChunkMetaData, fixedLength, (UInt4Vector) v, schemaElement);
                 default:
                   throw new ExecutionSetupException("Unsupported dictionary converted type
" + convertedType + " for primitive type INT32");
               }
@@ -129,6 +138,8 @@ public class ColumnReaderFactory {
                 return new ParquetFixedWidthDictionaryReaders.DictionaryBigIntReader(recordReader,
allocateSize, descriptor, columnChunkMetaData, fixedLength, (BigIntVector) v, schemaElement);
               }
               switch (convertedType) {
+                case UINT_64:
+                  return new ParquetFixedWidthDictionaryReaders.DictionaryUInt8Reader(recordReader,
allocateSize, descriptor, columnChunkMetaData, fixedLength, (UInt8Vector) v, schemaElement);
                 case DECIMAL:
                   return new ParquetFixedWidthDictionaryReaders.DictionaryDecimal18Reader(recordReader,
allocateSize, descriptor, columnChunkMetaData, fixedLength, (Decimal18Vector) v, schemaElement);
                 case TIMESTAMP_MILLIS:

http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
index d7b6fbb..53a68ab 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
@@ -26,6 +26,8 @@ import org.apache.drill.exec.vector.Float8Vector;
 import org.apache.drill.exec.vector.IntVector;
 import org.apache.drill.exec.vector.TimeStampVector;
 import org.apache.drill.exec.vector.TimeVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.UInt8Vector;
 import org.apache.drill.exec.vector.VarBinaryVector;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.format.SchemaElement;
@@ -56,6 +58,41 @@ public class ParquetFixedWidthDictionaryReaders {
     }
   }
 
+  /**
+   * This class uses for reading unsigned integer fields.
+   */
+  static class DictionaryUInt4Reader extends FixedByteAlignedReader<UInt4Vector> {
+    DictionaryUInt4Reader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor
descriptor,
+                        ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, UInt4Vector
v,
+                        SchemaElement schemaElement) throws ExecutionSetupException {
+      super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v,
schemaElement);
+    }
+
+    // this method is called by its superclass during a read loop
+    @Override
+    protected void readField(long recordsToReadInThisPass) {
+
+      recordsReadInThisIteration = Math.min(pageReader.currentPageCount
+        - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
+
+      if (usingDictionary) {
+        UInt4Vector.Mutator mutator = valueVec.getMutator();
+        for (int i = 0; i < recordsReadInThisIteration; i++) {
+          mutator.setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readInteger());
+        }
+        // Set the write Index. The next page that gets read might be a page that does not
use dictionary encoding
+        // and we will go into the else condition below. The readField method of the parent
class requires the
+        // writer index to be set correctly.
+        readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
+        readLength = (int) Math.ceil(readLengthInBits / 8.0);
+        int writerIndex = valueVec.getBuffer().writerIndex();
+        valueVec.getBuffer().setIndex(0, writerIndex + (int) readLength);
+      } else {
+        super.readField(recordsToReadInThisPass);
+      }
+    }
+  }
+
   static class DictionaryFixedBinaryReader extends FixedByteAlignedReader<VarBinaryVector>
{
     DictionaryFixedBinaryReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor
descriptor,
                         ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarBinaryVector
v,
@@ -174,6 +211,41 @@ public class ParquetFixedWidthDictionaryReaders {
     }
   }
 
+  /**
+   * This class uses for reading unsigned BigInt fields.
+   */
+  static class DictionaryUInt8Reader extends FixedByteAlignedReader<UInt8Vector> {
+    DictionaryUInt8Reader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor
descriptor,
+                           ColumnChunkMetaData columnChunkMetaData, boolean fixedLength,
UInt8Vector v,
+                           SchemaElement schemaElement) throws ExecutionSetupException {
+      super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v,
schemaElement);
+    }
+
+    // this method is called by its superclass during a read loop
+    @Override
+    protected void readField(long recordsToReadInThisPass) {
+
+      recordsReadInThisIteration = Math.min(pageReader.currentPageCount
+        - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
+
+      if (usingDictionary) {
+        UInt8Vector.Mutator mutator = valueVec.getMutator();
+        for (int i = 0; i < recordsReadInThisIteration; i++) {
+          mutator.setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readLong());
+        }
+        // Set the write Index. The next page that gets read might be a page that does not
use dictionary encoding
+        // and we will go into the else condition below. The readField method of the parent
class requires the
+        // writer index to be set correctly.
+        readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
+        readLength = (int) Math.ceil(readLengthInBits / 8.0);
+        int writerIndex = valueVec.getBuffer().writerIndex();
+        valueVec.getBuffer().setIndex(0, writerIndex + (int) readLength);
+      } else {
+        super.readField(recordsToReadInThisPass);
+      }
+    }
+  }
+
   static class DictionaryDecimal18Reader extends FixedByteAlignedReader<Decimal18Vector>
{
     DictionaryDecimal18Reader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor
descriptor,
                            ColumnChunkMetaData columnChunkMetaData, boolean fixedLength,
Decimal18Vector v,

http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
index be27f3e..3f5f3b2 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
@@ -61,6 +61,8 @@ public class ParquetToDrillTypeConverter {
           return (TypeProtos.MinorType.BIGINT);
         }
         switch(convertedType) {
+          case UINT_64:
+            return TypeProtos.MinorType.UINT8;
           case DECIMAL:
             ParquetReaderUtility.checkDecimalTypeEnabled(options);
             return TypeProtos.MinorType.DECIMAL18;
@@ -77,6 +79,13 @@ public class ParquetToDrillTypeConverter {
           return TypeProtos.MinorType.INT;
         }
         switch(convertedType) {
+          case UINT_8:
+          case UINT_16:
+          case UINT_32:
+            return TypeProtos.MinorType.UINT4;
+          case INT_8:
+          case INT_16:
+            return TypeProtos.MinorType.INT;
           case DECIMAL:
             ParquetReaderUtility.checkDecimalTypeEnabled(options);
             return TypeProtos.MinorType.DECIMAL9;

http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
index b18fd9d..477b825 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
@@ -84,4 +84,18 @@ public class TestDrillParquetReader extends BaseTestQuery {
       .sqlBaselineQuery("SELECT columns[0] id, CAST(NULLIF(columns[1], '') AS DOUBLE) val
FROM cp.`parquet2/4349.csv.gz` WHERE columns[0] = 'b'")
       .go();
   }
+
+  @Test
+  public void testUnsignedAndSignedIntTypes() throws Exception {
+    testBuilder()
+      .unOrdered()
+      .sqlQuery("select * from cp.`parquet/uint_types.parquet`")
+      .baselineColumns("uint8_field", "uint16_field", "uint32_field", "uint64_field", "int8_field",
"int16_field",
+        "required_uint8_field", "required_uint16_field", "required_uint32_field", "required_uint64_field",
+        "required_int8_field", "required_int16_field")
+      .baselineValues(255, 65535, 2147483647, 9223372036854775807L, 255, 65535, -1, -1, -1,
-1L, -2147483648, -2147483648)
+      .baselineValues(-1, -1, -1, -1L, -2147483648, -2147483648, 255, 65535, 2147483647,
9223372036854775807L, 255, 65535)
+      .baselineValues(null, null, null, null, null, null, 0, 0, 0, 0L, 0, 0)
+      .go();
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/60624af2/exec/java-exec/src/test/resources/parquet/uint_types.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/uint_types.parquet b/exec/java-exec/src/test/resources/parquet/uint_types.parquet
new file mode 100644
index 0000000..62ea0279
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/uint_types.parquet
differ


Mime
View raw message