drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From par...@apache.org
Subject [1/2] drill git commit: Fix for DRILL-4759: Drill throwing array index out of bound exception when reading a parquet file written by map reduce program
Date Wed, 20 Jul 2016 00:08:44 GMT
Repository: drill
Updated Branches:
  refs/heads/master 4f818d074 -> 34ca63ba1


Fix for DRILL-4759: Drill throwing array index out of bound exception when reading a parquet
file written by map reduce program

Added unit test case.

Updated fix


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/e371e18b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/e371e18b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/e371e18b

Branch: refs/heads/master
Commit: e371e18b9bd720a862b55f9e2682b39e1f68ce97
Parents: 4f818d0
Author: Padma Penumarthy <ppenumarthy@PPENUMARTHY-E653-MPR13.local>
Authored: Tue Jul 5 11:28:11 2016 -0700
Committer: Parth Chandra <parthc@apache.org>
Committed: Tue Jul 19 10:13:25 2016 -0700

----------------------------------------------------------------------
 .../ParquetFixedWidthDictionaryReaders.java      |  18 +++++++++++++-----
 .../columnreaders/TestColumnReaderFactory.java   |  15 +++++++++++++++
 .../resources/parquet/bigIntDictionary.parquet   | Bin 0 -> 1918423 bytes
 3 files changed, 28 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
index 00bf5f0..d7b6fbb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
@@ -156,12 +156,20 @@ public class ParquetFixedWidthDictionaryReaders {
       recordsReadInThisIteration = Math.min(pageReader.currentPageCount
           - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
 
-      for (int i = 0; i < recordsReadInThisIteration; i++){
-        try {
-        valueVec.getMutator().setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readLong());
-        } catch ( Exception ex) {
-          throw ex;
+      if (usingDictionary) {
+        BigIntVector.Mutator mutator =  valueVec.getMutator();
+        for (int i = 0; i < recordsReadInThisIteration; i++){
+          mutator.setSafe(valuesReadInCurrentPass + i,  pageReader.dictionaryValueReader.readLong());
         }
+        // Set the write Index. The next page that gets read might be a page that does not
use dictionary encoding
+        // and we will go into the else condition below. The readField method of the parent
class requires the
+        // writer index to be set correctly.
+        readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
+        readLength = (int) Math.ceil(readLengthInBits / 8.0);
+        int writerIndex = valueVec.getBuffer().writerIndex();
+        valueVec.getBuffer().setIndex(0, writerIndex + (int)readLength);
+      } else {
+        super.readField(recordsToReadInThisPass);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
index 4dff928..bfd894d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
@@ -94,4 +94,19 @@ public class TestColumnReaderFactory extends BaseTestQuery {
     // query parquet file. We shouldn't get any exception
     testNoResult("SELECT * FROM cp.`parquet/decimal_nodictionary.parquet`");
   }
+
+  /**
+   * check if BigInt is read correctly with dictionary encoding.
+   */
+  @Test
+  public void testBigIntWithDictionary() throws Exception {
+    String query = "select sum(ts) as total from cp.`parquet/bigIntDictionary.parquet`";
+
+    testBuilder()
+    .sqlQuery(query)
+    .ordered()
+    .baselineColumns("total")
+    .baselineValues(190928593476806865L)
+    .build().run();
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet
new file mode 100644
index 0000000..51c59cc
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet
differ


Mime
View raw message