parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject parquet-mr git commit: PARQUET-645: Fix null handling in DictionaryFilter.
Date Thu, 30 Jun 2016 16:47:53 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 1f470253c -> 9c40a7bb3


PARQUET-645: Fix null handling in DictionaryFilter.

This fixes how null is handled by `DictionaryFilter` for equals predicates. Null is never
in the dictionary and is encoded by the definition level, so the `DictionaryFilter` would
never find the value in the dictionary and would incorrectly filter row groups whenever the
filter was `col == null`.

Author: Ryan Blue <blue@apache.org>

Closes #348 from rdblue/PARQUET-645-fix-null-dictionary-filter and squashes the following
commits:

ae8dd41 [Ryan Blue] PARQUET-645: Fix null handling in DictionaryFilter.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/9c40a7bb
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/9c40a7bb
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/9c40a7bb

Branch: refs/heads/master
Commit: 9c40a7bb3c9aca51d17490960c988dfb7b5acebb
Parents: 1f47025
Author: Ryan Blue <blue@apache.org>
Authored: Thu Jun 30 09:47:48 2016 -0700
Committer: Julien Le Dem <julien@dremio.com>
Committed: Thu Jun 30 09:47:48 2016 -0700

----------------------------------------------------------------------
 .../filter2/dictionarylevel/DictionaryFilter.java       | 12 ++++++++++++
 .../filter2/dictionarylevel/DictionaryFilterTest.java   |  6 ++++++
 2 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9c40a7bb/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
index 9b03f82..dc1d649 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
@@ -123,6 +123,12 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean>
{
 
     filterColumn.getColumnPath();
 
+    if (value == null) {
+      // the dictionary contains only non-null values so isn't helpful. this
+      // could check the column stats, but the StatisticsFilter is responsible
+      return BLOCK_MIGHT_MATCH;
+    }
+
     try {
       Set<T> dictSet = expandDictionary(meta);
       if (dictSet != null && !dictSet.contains(value)) {
@@ -150,6 +156,12 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean>
{
 
     filterColumn.getColumnPath();
 
+    if (value == null) {
+      // the dictionary contains only non-null values so isn't helpful. this
+      // could check the column stats, but the StatisticsFilter is responsible
+      return BLOCK_MIGHT_MATCH;
+    }
+
     try {
       Set<T> dictSet = expandDictionary(meta);
       if (dictSet != null && dictSet.size() == 1 && dictSet.contains(value))
{

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9c40a7bb/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
index 754da68..35b944d 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
@@ -193,6 +193,9 @@ public class DictionaryFilterTest {
 
     assertTrue("Should drop block for upper case letters",
         canDrop(eq(b, Binary.fromString("A")), ccmd, dictionaries));
+
+    assertFalse("Should not drop block for null",
+        canDrop(eq(b, null), ccmd, dictionaries));
   }
 
   @Test
@@ -211,6 +214,9 @@ public class DictionaryFilterTest {
 
     assertFalse("Should not drop block with a known value",
         canDrop(notEq(b, Binary.fromString("B")), ccmd, dictionaries));
+
+    assertFalse("Should not drop block for null",
+        canDrop(notEq(b, null), ccmd, dictionaries));
   }
 
   @Test


Mime
View raw message