drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From par...@apache.org
Subject [2/3] drill git commit: DRILL-2908: Fix Parquet for var length vectors where encoding changes across pages. Add unit tests. Add option to make parquet page size and disctionary page size configurable at session level. This closes #162
Date Mon, 28 Sep 2015 18:36:21 GMT
DRILL-2908: Fix Parquet for var length vectors where encoding changes across pages. Add unit
tests. Add option to make parquet page size and disctionary page size configurable at session
level. This closes #162


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/51179b92
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/51179b92
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/51179b92

Branch: refs/heads/master
Commit: 51179b92fa93a156decb910dcd7a32c762365f86
Parents: 657fe5b
Author: Parth Chandra <parthc@apache.org>
Authored: Tue Sep 15 15:27:17 2015 -0700
Committer: Parth Chandra <parthc@apache.org>
Committed: Mon Sep 28 11:35:28 2015 -0700

----------------------------------------------------------------------
 .../org/apache/drill/exec/ExecConstants.java    |   4 ++
 .../impl/conv/ConvertFromImpalaTimestamp.java   |  18 ++++--
 .../server/options/SystemOptionManager.java     |   2 +
 .../exec/store/parquet/ParquetFormatPlugin.java |   4 +-
 .../exec/store/parquet/ParquetGroupScan.java    |   1 +
 .../exec/store/parquet/ParquetRecordWriter.java |   6 +-
 .../columnreaders/ColumnReaderFactory.java      |   6 +-
 .../NullableFixedByteAlignedReaders.java        |   8 ++-
 .../ParquetFixedWidthDictionaryReaders.java     |  11 +++-
 .../java/org/apache/drill/DrillTestWrapper.java |   5 +-
 .../physical/impl/writer/TestParquetWriter.java |  58 +++++++++++++++++++
 .../resources/parquet/int96_dict_change.parquet | Bin 0 -> 16567 bytes
 .../resources/parquet/int96_impala_1.parquet    | Bin 0 -> 691 bytes
 .../parquet/part1/hive_all_types.parquet        | Bin 0 -> 2134 bytes
 14 files changed, 110 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 0f6a5bb..66f9f03 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -114,6 +114,10 @@ public interface ExecConstants {
   public static final OptionValidator OUTPUT_FORMAT_VALIDATOR = new StringValidator(OUTPUT_FORMAT_OPTION,
"parquet");
   public static final String PARQUET_BLOCK_SIZE = "store.parquet.block-size";
   public static final OptionValidator PARQUET_BLOCK_SIZE_VALIDATOR = new LongValidator(PARQUET_BLOCK_SIZE,
512*1024*1024);
+  public static final String PARQUET_PAGE_SIZE = "store.parquet.page-size";
+  public static final OptionValidator PARQUET_PAGE_SIZE_VALIDATOR = new LongValidator(PARQUET_PAGE_SIZE,
1024*1024);
+  public static final String PARQUET_DICT_PAGE_SIZE = "store.parquet.dictionary.page-size";
+  public static final OptionValidator PARQUET_DICT_PAGE_SIZE_VALIDATOR = new LongValidator(PARQUET_DICT_PAGE_SIZE,
1024*1024);
   public static final String PARQUET_WRITER_COMPRESSION_TYPE = "store.parquet.compression";
   public static final OptionValidator PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR = new EnumeratedStringValidator(
       PARQUET_WRITER_COMPRESSION_TYPE, "snappy", "snappy", "gzip", "none");

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/ConvertFromImpalaTimestamp.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/ConvertFromImpalaTimestamp.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/ConvertFromImpalaTimestamp.java
index e91c08e..a57eede 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/ConvertFromImpalaTimestamp.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/ConvertFromImpalaTimestamp.java
@@ -21,18 +21,20 @@ import org.apache.drill.exec.expr.DrillSimpleFunc;
 import org.apache.drill.exec.expr.annotations.FunctionTemplate;
 import org.apache.drill.exec.expr.annotations.Output;
 import org.apache.drill.exec.expr.annotations.Param;
-import org.apache.drill.exec.expr.holders.DateHolder;
+import org.apache.drill.exec.expr.annotations.Workspace;
 import org.apache.drill.exec.expr.holders.TimeStampHolder;
 import org.apache.drill.exec.expr.holders.VarBinaryHolder;
 
 public class ConvertFromImpalaTimestamp {
 
-  @FunctionTemplate(name = "convert_fromIMPALA_TIMESTAMP", scope = FunctionTemplate.FunctionScope.SIMPLE,
nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
+
+  @FunctionTemplate(name = "convert_fromTIMESTAMP_IMPALA", scope = FunctionTemplate.FunctionScope.SIMPLE,
nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
   public static class ImpalaTimestampConvertFrom implements DrillSimpleFunc {
 
     @Param VarBinaryHolder in;
     @Output TimeStampHolder out;
 
+
     @Override
     public void setup() { }
 
@@ -43,8 +45,16 @@ public class ConvertFromImpalaTimestamp {
       in.buffer.readerIndex(in.start);
       long nanosOfDay = in.buffer.readLong();
       int julianDay = in.buffer.readInt();
-      // Need to subtract half of a day because julian days are recorded as starting at noon
-      out.value = org.joda.time.DateTimeUtils.fromJulianDay(julianDay - 0.5 + nanosOfDay
/ (24.0 * 3600 * 1000000000));
+      /* We use the same implementation as org.joda.time.DateTimeUtils.fromJulianDay but
avoid rounding errors
+         Note we need to subtract half of a day because julian days are recorded as starting
at noon.
+         From Joda :
+              public static final long fromJulianDay(double julianDay) {
+                484            double epochDay = julianDay - 2440587.5d;
+                485            return (long) (epochDay * 86400000d);
+                486        }
+      */
+      long dateTime = (julianDay - 2440588)*86400000L + (nanosOfDay / 1000000);
+      out.value = new org.joda.time.DateTime((long) dateTime, org.joda.time.chrono.JulianChronology.getInstance()).withZoneRetainFields(org.joda.time.DateTimeZone.UTC).getMillis();
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 118f7ad..5838bd1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -79,6 +79,8 @@ public class SystemOptionManager extends BaseOptionManager {
       ExecConstants.CAST_TO_NULLABLE_NUMERIC_OPTION,
       ExecConstants.OUTPUT_FORMAT_VALIDATOR,
       ExecConstants.PARQUET_BLOCK_SIZE_VALIDATOR,
+      ExecConstants.PARQUET_PAGE_SIZE_VALIDATOR,
+      ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR,
       ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR,
       ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING_VALIDATOR,
       ExecConstants.PARQUET_VECTOR_FILL_THRESHOLD_VALIDATOR,

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index eeb522a..3b7839a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -135,6 +135,8 @@ public class ParquetFormatPlugin implements FormatPlugin{
     options.put(FileSystem.FS_DEFAULT_NAME_KEY, ((FileSystemConfig)writer.getStorageConfig()).connection);
 
     options.put(ExecConstants.PARQUET_BLOCK_SIZE, context.getOptions().getOption(ExecConstants.PARQUET_BLOCK_SIZE).num_val.toString());
+    options.put(ExecConstants.PARQUET_PAGE_SIZE, context.getOptions().getOption(ExecConstants.PARQUET_PAGE_SIZE).num_val.toString());
+    options.put(ExecConstants.PARQUET_DICT_PAGE_SIZE, context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE).num_val.toString());
 
     options.put(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE,
         context.getOptions().getOption(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).string_val);
@@ -234,4 +236,4 @@ public class ParquetFormatPlugin implements FormatPlugin{
       }
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
index 00d36ff..7800721 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
@@ -303,6 +303,7 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
       return Types.optional(MinorType.FLOAT8);
     case BINARY:
     case FIXED_LEN_BYTE_ARRAY:
+    case INT96:
       return Types.optional(MinorType.VARBINARY);
     default:
       // Should never hit this

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
index f118535..6c15d3f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
@@ -81,8 +81,8 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
   private MessageType schema;
   private Map<String, String> extraMetaData = new HashMap<>();
   private int blockSize;
-  private int pageSize = 1024 * 1024;
-  private int dictionaryPageSize = pageSize;
+  private int pageSize;
+  private int dictionaryPageSize;
   private boolean enableDictionary = false;
   private CompressionCodecName codec = CompressionCodecName.SNAPPY;
   private WriterVersion writerVersion = WriterVersion.PARQUET_1_0;
@@ -121,6 +121,8 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
     conf = new Configuration();
     conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY));
     blockSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_BLOCK_SIZE));
+    pageSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_PAGE_SIZE));
+    dictionaryPageSize= Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_DICT_PAGE_SIZE));
     String codecName = writerOptions.get(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).toLowerCase();
     switch(codecName) {
     case "snappy":

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
index 5292678..a13cde3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
@@ -211,7 +211,11 @@ public class ColumnReaderFactory {
     ConvertedType convertedType = schemaElement.getConverted_type();
 
     if (! columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
-      return new NullableFixedByteAlignedReaders.NullableFixedByteAlignedReader(parentReader,
allocateSize, columnDescriptor, columnChunkMetaData, fixedLength, valueVec, schemaElement);
+      if (columnDescriptor.getType() == PrimitiveType.PrimitiveTypeName.INT96) {
+        return new NullableFixedByteAlignedReaders.NullableFixedBinaryReader(parentReader,
allocateSize, columnDescriptor, columnChunkMetaData, true, (NullableVarBinaryVector) valueVec,
schemaElement);
+      }else{
+        return new NullableFixedByteAlignedReaders.NullableFixedByteAlignedReader(parentReader,
allocateSize, columnDescriptor, columnChunkMetaData, fixedLength, valueVec, schemaElement);
+      }
     } else {
       switch (columnDescriptor.getType()) {
         case INT32:

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableFixedByteAlignedReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableFixedByteAlignedReaders.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableFixedByteAlignedReaders.java
index 4d15ec6..f658518 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableFixedByteAlignedReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableFixedByteAlignedReaders.java
@@ -86,7 +86,6 @@ public class NullableFixedByteAlignedReaders {
     @Override
     protected void readField(long recordsToReadInThisPass) {
       this.bytebuf = pageReader.pageData;
-
       if (usingDictionary) {
         NullableVarBinaryVector.Mutator mutator =  castedVector.getMutator();
         Binary currDictValToWrite;
@@ -95,6 +94,11 @@ public class NullableFixedByteAlignedReaders {
           mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(),
0,
               currDictValToWrite.length());
         }
+        // Set the write Index. The next page that gets read might be a page that does not
use dictionary encoding
+        // and we will go into the else condition below. The readField method of the parent
class requires the
+        // writer index to be set correctly.
+        int writerIndex = castedBaseVector.getBuffer().writerIndex();
+        castedBaseVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
       } else {
         super.readField(recordsToReadInThisPass);
         // TODO - replace this with fixed binary type in drill
@@ -397,5 +401,5 @@ public class NullableFixedByteAlignedReaders {
       nullableIntervalVector.getMutator().set(index, 1, bytebuf.getInt(start), bytebuf.getInt(start
+ 4), bytebuf.getInt(start + 8));
     }
   }
+}
 
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
index ea75cad..4969dc0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
@@ -80,15 +80,22 @@ public class ParquetFixedWidthDictionaryReaders {
 
       recordsReadInThisIteration = Math.min(pageReader.currentPageCount
           - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
+      readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
+      readLength = (int) Math.ceil(readLengthInBits / 8.0);
 
       if (usingDictionary) {
         VarBinaryVector.Mutator mutator =  castedVector.getMutator();
         Binary currDictValToWrite = null;
         for (int i = 0; i < recordsReadInThisIteration; i++){
           currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
-          mutator.setSafe(i, currDictValToWrite.toByteBuffer(), 0,
+          mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(),
0,
               currDictValToWrite.length());
         }
+        // Set the write Index. The next page that gets read might be a page that does not
use dictionary encoding
+        // and we will go into the else condition below. The readField method of the parent
class requires the
+        // writer index to be set correctly.
+        int writerIndex = castedVector.getBuffer().writerIndex();
+        castedVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
       } else {
         super.readField(recordsToReadInThisPass);
       }
@@ -97,7 +104,7 @@ public class ParquetFixedWidthDictionaryReaders {
       // now we need to write the lengths of each value
       int byteLength = dataTypeLengthInBits / 8;
       for (int i = 0; i < recordsToReadInThisPass; i++) {
-        castedVector.getMutator().setValueLengthSafe(i, byteLength);
+        castedVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass + i, byteLength);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
index 59cbb15..60116e2 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
@@ -32,6 +32,7 @@ import org.apache.drill.exec.record.VectorWrapper;
 import org.apache.drill.exec.rpc.user.QueryDataBatch;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.hadoop.io.Text;
+import org.codehaus.jackson.node.BinaryNode;
 
 import java.io.UnsupportedEncodingException;
 import java.lang.reflect.Array;
@@ -253,7 +254,9 @@ public class DrillTestWrapper {
               }
             }
             else if (obj instanceof byte[]) {
-              obj = new String((byte[]) obj, "UTF-8");
+              // Borrowed from parquet-tools, allows printing of varbinary columns as readable
strings
+              // and also matches the data output by 'parquet-tools cat'
+              obj= new BinaryNode((byte[]) obj).asText();
             }
           }
           combinedVectors.get(field).add(obj);

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index 5dc7bec..b8dabb6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -599,4 +599,62 @@ public class TestParquetWriter extends BaseTestQuery {
       deleteTableIfExists(outputFile);
     }
   }
+
+  /*
+  Test the reading of an int96 field. Impala encodes timestamps as int96 fields
+   */
+  @Test
+  public void testImpalaParquetInt96() throws Exception {
+    compareParquetReadersColumnar("field_impala_ts", "cp.`parquet/int96_impala_1.parquet`");
+  }
+
+  /*
+  Test the reading of a binary field where data is in dicationary _and_ non-dictionary encoded
pages
+   */
+  @Test
+  public void testImpalaParquetVarBinary_DictChange() throws Exception {
+    compareParquetReadersColumnar("field_impala_ts", "cp.`parquet/int96_dict_change.parquet`");
+  }
+
+  /*
+     Test the conversion from int96 to impala timestamp
+   */
+  @Test
+  public void testImpalaParquetTimestampAsInt96() throws Exception {
+    compareParquetReadersColumnar("convert_from(field_impala_ts, 'TIMESTAMP_IMPALA')", "cp.`parquet/int96_impala_1.parquet`");
+  }
+
+  /*
+    Test a file with partitions and an int96 column. (Data generated using Hive)
+   */
+  @Test
+  public void testImpalaParquetInt96Partitioned() throws Exception {
+    compareParquetReadersColumnar("timestamp_field", "cp.`parquet/part1/hive_all_types.parquet`");
+  }
+
+  /*
+  Test the conversion from int96 to impala timestamp with hive data including nulls. Validate
against old reader
+  */
+  @Test
+  public void testHiveParquetTimestampAsInt96_compare() throws Exception {
+    compareParquetReadersColumnar("convert_from(timestamp_field, 'TIMESTAMP_IMPALA')", "cp.`parquet/part1/hive_all_types.parquet`");
+  }
+
+  /*
+  Test the conversion from int96 to impala timestamp with hive data including nulls. Validate
against expected values
+  */
+  @Test
+  @Ignore("relies on particular time zone")
+  public void testHiveParquetTimestampAsInt96_basic() throws Exception {
+    final String q = "SELECT cast(convert_from(timestamp_field, 'TIMESTAMP_IMPALA') as varchar(19))
 as timestamp_field "
+            + "from cp.`parquet/part1/hive_all_types.parquet` ";
+
+    testBuilder()
+            .unOrdered()
+            .sqlQuery(q)
+            .baselineColumns("timestamp_field")
+            .baselineValues("2013-07-05 17:01:00")
+            .baselineValues((Object)null)
+            .go();
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/test/resources/parquet/int96_dict_change.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/int96_dict_change.parquet b/exec/java-exec/src/test/resources/parquet/int96_dict_change.parquet
new file mode 100644
index 0000000..4186daa
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/int96_dict_change.parquet
differ

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/test/resources/parquet/int96_impala_1.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/int96_impala_1.parquet b/exec/java-exec/src/test/resources/parquet/int96_impala_1.parquet
new file mode 100644
index 0000000..2ce9d09
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/int96_impala_1.parquet
differ

http://git-wip-us.apache.org/repos/asf/drill/blob/51179b92/exec/java-exec/src/test/resources/parquet/part1/hive_all_types.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/part1/hive_all_types.parquet b/exec/java-exec/src/test/resources/parquet/part1/hive_all_types.parquet
new file mode 100644
index 0000000..7a8ff37
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/part1/hive_all_types.parquet
differ


Mime
View raw message