drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From adene...@apache.org
Subject drill git commit: DRILL-4349: parquet reader returns wrong results when reading a nullable column that starts with a large number of nulls (>30k)
Date Fri, 05 Feb 2016 01:29:01 GMT
Repository: drill
Updated Branches:
  refs/heads/master cb1a0236f -> ca53c2440


DRILL-4349: parquet reader returns wrong results when reading a nullable column that starts
with a large number of nulls (>30k)


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/ca53c244
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/ca53c244
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/ca53c244

Branch: refs/heads/master
Commit: ca53c2440fb33e31220d11aee297fee67fc6bd6a
Parents: cb1a023
Author: adeneche <adeneche@gmail.com>
Authored: Wed Feb 3 15:42:22 2016 -0800
Committer: adeneche <adeneche@gmail.com>
Committed: Thu Feb 4 15:41:43 2016 -0800

----------------------------------------------------------------------
 .../parquet/columnreaders/NullableColumnReader.java  |   2 +-
 .../exec/store/parquet2/TestDrillParquetReader.java  |  13 +++++++++++++
 .../src/test/resources/parquet2/4349.csv.gz          | Bin 0 -> 202 bytes
 3 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/ca53c244/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
index 4e52b70..2929eb2 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
@@ -154,10 +154,10 @@ abstract class NullableColumnReader<V extends ValueVector> extends
ColumnReader<
 
         writeCount += runLength;
         valuesReadInCurrentPass += runLength;
+        pageReader.readPosInBytes = readStartInBytes + readLength;
       }
 
       pageReader.valuesRead += recordsReadInThisIteration;
-      pageReader.readPosInBytes = readStartInBytes + readLength;
 
       totalValuesRead += runLength + nullRunLength;
 

http://git-wip-us.apache.org/repos/asf/drill/blob/ca53c244/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
index 05ca7fc..b18fd9d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
@@ -71,4 +71,17 @@ public class TestDrillParquetReader extends BaseTestQuery {
   public void testOptionalDecimal38() throws Exception {
     testColumn("d38_opt");
   }
+
+  @Test
+  public void test4349() throws Exception {
+    // start by creating a parquet file from the input csv file
+    runSQL("CREATE TABLE dfs_test.tmp.`4349` AS SELECT columns[0] id, CAST(NULLIF(columns[1],
'') AS DOUBLE) val FROM cp.`parquet2/4349.csv.gz`");
+
+    // querying the parquet file should return the same results found in the csv file
+    testBuilder()
+      .unOrdered()
+      .sqlQuery("SELECT * FROM dfs_test.tmp.`4349` WHERE id = 'b'")
+      .sqlBaselineQuery("SELECT columns[0] id, CAST(NULLIF(columns[1], '') AS DOUBLE) val
FROM cp.`parquet2/4349.csv.gz` WHERE columns[0] = 'b'")
+      .go();
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/ca53c244/exec/java-exec/src/test/resources/parquet2/4349.csv.gz
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet2/4349.csv.gz b/exec/java-exec/src/test/resources/parquet2/4349.csv.gz
new file mode 100644
index 0000000..0729b0c
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet2/4349.csv.gz differ


Mime
View raw message