PARQUET-511: Integer overflow when counting values in column.
This commit fixes an issue when the number of entries in a column page is larger than the
size of an integer. No exception is thrown directly, but the def level is set incorrectly,
leading to a null value being returned during read.
Author: Michal Gorecki <goreckim@amazon.com>
Closes #321 from goreckm/int-overflow and squashes the following commits:
d224815 [Michal Gorecki] enhancing exception message
7334be2 [Michal Gorecki] PARQUET-511: Integer overflow when counting values in column.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/aced0eb3
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/aced0eb3
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/aced0eb3
Branch: refs/heads/parquet-1.8.x
Commit: aced0eb3770b82d9bde95ac499f902bed372fd39
Parents: 06567fa
Author: Michal Gorecki <goreckim@amazon.com>
Authored: Mon Aug 1 14:38:07 2016 -0700
Committer: Ryan Blue <blue@apache.org>
Committed: Mon Jan 9 16:54:54 2017 -0800
----------------------------------------------------------------------
.../java/org/apache/parquet/column/impl/ColumnReaderImpl.java | 6 +++---
.../org/apache/parquet/hadoop/ColumnChunkPageReadStore.java | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/aced0eb3/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
index c53977f..6aafb78 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
@@ -149,7 +149,7 @@ public class ColumnReaderImpl implements ColumnReader {
private int dictionaryId;
private long endOfPageValueCount;
- private int readValues = 0;
+ private long readValues = 0;
private int pageValueCount = 0;
private final PrimitiveConverter converter;
@@ -351,8 +351,8 @@ public class ColumnReaderImpl implements ColumnReader {
this.dictionary = null;
}
this.totalValueCount = pageReader.getTotalValueCount();
- if (totalValueCount == 0) {
- throw new ParquetDecodingException("totalValueCount == 0");
+ if (totalValueCount <= 0) {
+ throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <=
0");
}
consume();
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/aced0eb3/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
index ce10e64..2e8f84a 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
@@ -64,7 +64,7 @@ class ColumnChunkPageReadStore implements PageReadStore, DictionaryPageReadStore
this.decompressor = decompressor;
this.compressedPages = new LinkedList<DataPage>(compressedPages);
this.compressedDictionaryPage = compressedDictionaryPage;
- int count = 0;
+ long count = 0;
for (DataPage p : compressedPages) {
count += p.getValueCount();
}
|