parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [parquet-mr] branch master updated: PARQUET-1341: Fix null count stats in unsigned-sort columns. (#499)
Date Tue, 03 Jul 2018 22:24:56 GMT
This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new d320a45  PARQUET-1341: Fix null count stats in unsigned-sort columns. (#499)
d320a45 is described below

commit d320a457a9de67be25a03f79e1695d549a0145f3
Author: Ryan Blue <rdblue@users.noreply.github.com>
AuthorDate: Tue Jul 3 15:24:53 2018 -0700

    PARQUET-1341: Fix null count stats in unsigned-sort columns. (#499)
    
    * Fix null count stats in unsigned-sort columns.
    * Fix test case for old min/max values and unsigned ordering.
---
 .../parquet/format/converter/ParquetMetadataConverter.java     | 10 ++++------
 .../parquet/format/converter/TestParquetMetadataConverter.java |  4 +++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index ff3d6cb..d222505 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -621,9 +621,6 @@ public class ParquetMetadataConverter {
           statsBuilder.withMin(min);
           statsBuilder.withMax(max);
         }
-        if (formatStats.isSetNull_count()) {
-          statsBuilder.withNumNulls(formatStats.null_count);
-        }
       } else {
         boolean isSet = formatStats.isSetMax() && formatStats.isSetMin();
         boolean maxEqualsMin = isSet ? Arrays.equals(formatStats.getMin(), formatStats.getMax())
: false;
@@ -639,11 +636,12 @@ public class ParquetMetadataConverter {
             statsBuilder.withMin(formatStats.min.array());
             statsBuilder.withMax(formatStats.max.array());
           }
-          if (formatStats.isSetNull_count()) {
-            statsBuilder.withNumNulls(formatStats.null_count);
-          }
         }
       }
+
+      if (formatStats.isSetNull_count()) {
+        statsBuilder.withNumNulls(formatStats.null_count);
+      }
     }
     return statsBuilder.build();
   }
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index b3eebd6..1474525 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -617,7 +617,9 @@ public class TestParquetMetadataConverter {
         StatsHelper.V1.toParquetStatistics(stats),
         binaryType);
 
-    Assert.assertTrue("Stats should be empty: " + convertedStats, convertedStats.isEmpty());
+    Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue());
+    Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet());
+    Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls());
   }
 
   @Test


Mime
View raw message