parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject parquet-cpp git commit: PARQUET-780: WriterBatch API does not properly handle NULL values for byte array types
Date Sat, 26 Nov 2016 20:29:20 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 086d5cc73 -> 47a94590d


PARQUET-780: WriterBatch API does not properly handle NULL values for byte array types

CC: @xhochy @trink

Author: Deepak Majeti <deepak.majeti@hpe.com>

Closes #195 from majetideepak/PARQUET-780 and squashes the following commits:

12e2197 [Deepak Majeti] use nullptr
08f287f [Deepak Majeti] Add NULL ptr checks


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/47a94590
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/47a94590
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/47a94590

Branch: refs/heads/master
Commit: 47a94590d00e50a96d83ba3e8970af1271803218
Parents: 086d5cc
Author: Deepak Majeti <deepak.majeti@hpe.com>
Authored: Sat Nov 26 15:29:12 2016 -0500
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Sat Nov 26 15:29:12 2016 -0500

----------------------------------------------------------------------
 src/parquet/column/writer.cc                | 3 +++
 src/parquet/encodings/dictionary-encoding.h | 4 ++++
 src/parquet/encodings/plain-encoding.h      | 4 ++++
 3 files changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/47a94590/src/parquet/column/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc
index 92a5e09..6112efe 100644
--- a/src/parquet/column/writer.cc
+++ b/src/parquet/column/writer.cc
@@ -336,6 +336,9 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t
num_values,
     throw ParquetException("More rows were written in the column chunk than expected");
   }
 
+  // PARQUET-780
+  if (values_to_write > 0) { DCHECK(nullptr != values) << "Values ptr cannot be
NULL"; }
+
   WriteValues(values_to_write, values);
 
   if (page_statistics_ != nullptr) {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/47a94590/src/parquet/encodings/dictionary-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h
index 465a0e4..7823307 100644
--- a/src/parquet/encodings/dictionary-encoding.h
+++ b/src/parquet/encodings/dictionary-encoding.h
@@ -285,11 +285,13 @@ inline int DictEncoder<DType>::Hash(const typename DType::c_type&
value) const {
 
 template <>
 inline int DictEncoder<ByteArrayType>::Hash(const ByteArray& value) const {
+  if (value.len > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL";
}
   return HashUtil::Hash(value.ptr, value.len, 0);
 }
 
 template <>
 inline int DictEncoder<FLBAType>::Hash(const FixedLenByteArray& value) const {
+  if (type_length_ > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL";
}
   return HashUtil::Hash(value.ptr, type_length_, 0);
 }
 
@@ -415,6 +417,7 @@ inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer)
{
   for (const ByteArray& v : uniques_) {
     memcpy(buffer, reinterpret_cast<const void*>(&v.len), sizeof(uint32_t));
     buffer += sizeof(uint32_t);
+    if (v.len > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL"; }
     memcpy(buffer, v.ptr, v.len);
     buffer += v.len;
   }
@@ -423,6 +426,7 @@ inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer)
{
 template <>
 inline void DictEncoder<FLBAType>::WriteDict(uint8_t* buffer) {
   for (const FixedLenByteArray& v : uniques_) {
+    if (type_length_ > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL";
}
     memcpy(buffer, v.ptr, type_length_);
     buffer += type_length_;
   }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/47a94590/src/parquet/encodings/plain-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/plain-encoding.h b/src/parquet/encodings/plain-encoding.h
index 6f0d503..a3d7b69 100644
--- a/src/parquet/encodings/plain-encoding.h
+++ b/src/parquet/encodings/plain-encoding.h
@@ -271,6 +271,7 @@ inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src,
int num_value
   for (int i = 0; i < num_values; ++i) {
     // Write the result to the output stream
     values_sink_->Write(reinterpret_cast<const uint8_t*>(&src[i].len), sizeof(uint32_t));
+    if (src[i].len > 0) { DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be
NULL"; }
     values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), src[i].len);
   }
 }
@@ -279,6 +280,9 @@ template <>
 inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values)
{
   for (int i = 0; i < num_values; ++i) {
     // Write the result to the output stream
+    if (descr_->type_length() > 0) {
+      DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
+    }
     values_sink_->Write(
         reinterpret_cast<const uint8_t*>(src[i].ptr), descr_->type_length());
   }


Mime
View raw message