Return-Path: X-Original-To: apmail-parquet-commits-archive@minotaur.apache.org Delivered-To: apmail-parquet-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D638A18CBB for ; Sat, 30 Apr 2016 18:13:21 +0000 (UTC) Received: (qmail 36680 invoked by uid 500); 30 Apr 2016 18:13:21 -0000 Delivered-To: apmail-parquet-commits-archive@parquet.apache.org Received: (qmail 36642 invoked by uid 500); 30 Apr 2016 18:13:21 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 36633 invoked by uid 99); 30 Apr 2016 18:13:21 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 30 Apr 2016 18:13:21 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 8D7CBDFC6F; Sat, 30 Apr 2016 18:13:21 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@parquet.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: parquet-cpp git commit: PARQUET-547: Refactor templates to all be based on DataType structs Date: Sat, 30 Apr 2016 18:13:21 +0000 (UTC) Repository: parquet-cpp Updated Branches: refs/heads/master 5fb7d20a9 -> dc0fc7d49 PARQUET-547: Refactor templates to all be based on DataType structs Author: Wes McKinney Closes #91 from wesm/PARQUET-547 and squashes the following commits: 97b8b9a [Wes McKinney] Refactor templates to all be based on DataType subclasses Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/dc0fc7d4 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/dc0fc7d4 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/dc0fc7d4 Branch: refs/heads/master Commit: dc0fc7d49ff2ffa8dcaba618a36f8524b9dbeba4 Parents: 5fb7d20 Author: Wes McKinney Authored: Sat Apr 30 11:13:34 2016 -0700 Committer: Wes McKinney Committed: Sat Apr 30 11:13:34 2016 -0700 ---------------------------------------------------------------------- example/decode_benchmark.cc | 4 +- src/parquet/column/reader.cc | 30 ++++++------- src/parquet/column/reader.h | 30 ++++++------- src/parquet/column/scanner-test.cc | 12 ++--- src/parquet/column/scanner.h | 46 ++++++++++---------- src/parquet/column/test-util.h | 4 +- src/parquet/column/writer.cc | 23 +++++----- src/parquet/column/writer.h | 32 +++++++------- src/parquet/encodings/decoder.h | 6 +-- src/parquet/encodings/delta-bit-pack-encoding.h | 12 ++--- .../encodings/delta-byte-array-encoding.h | 8 ++-- .../delta-length-byte-array-encoding.h | 8 ++-- src/parquet/encodings/dictionary-encoding.h | 27 ++++++------ src/parquet/encodings/encoder.h | 4 +- src/parquet/encodings/encoding-test.cc | 16 +++---- src/parquet/encodings/plain-encoding.h | 40 ++++++++--------- src/parquet/types.h | 4 +- 17 files changed, 151 insertions(+), 155 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/example/decode_benchmark.cc ---------------------------------------------------------------------- diff --git a/example/decode_benchmark.cc b/example/decode_benchmark.cc index 3285a71..81eee02 100644 --- a/example/decode_benchmark.cc +++ b/example/decode_benchmark.cc @@ -202,7 +202,7 @@ class DeltaByteArrayEncoder { uint64_t TestPlainIntEncoding(const uint8_t* data, int num_values, int batch_size) { uint64_t result = 0; - PlainDecoder decoder(nullptr); + PlainDecoder decoder(nullptr); decoder.SetData(num_values, data, num_values * sizeof(int64_t)); int64_t values[batch_size]; for (int i = 0; i < num_values;) { @@ -225,7 +225,7 @@ uint64_t TestBinaryPackedEncoding(const char* name, const vector& value } else { mini_block_size = 32; } - DeltaBitPackDecoder decoder(nullptr); + DeltaBitPackDecoder decoder(nullptr); DeltaBitPackEncoder encoder(mini_block_size); for (size_t i = 0; i < values.size(); ++i) { encoder.Add(values[i]); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/column/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc index f379348..4598dfb 100644 --- a/src/parquet/column/reader.cc +++ b/src/parquet/column/reader.cc @@ -36,8 +36,8 @@ ColumnReader::ColumnReader(const ColumnDescriptor* descr, num_decoded_values_(0), allocator_(allocator) {} -template -void TypedColumnReader::ConfigureDictionary(const DictionaryPage* page) { +template +void TypedColumnReader::ConfigureDictionary(const DictionaryPage* page) { int encoding = static_cast(page->encoding()); if (page->encoding() == Encoding::PLAIN_DICTIONARY || page->encoding() == Encoding::PLAIN) { @@ -51,7 +51,7 @@ void TypedColumnReader::ConfigureDictionary(const DictionaryPage* page) { if (page->encoding() == Encoding::PLAIN_DICTIONARY || page->encoding() == Encoding::PLAIN) { - PlainDecoder dictionary(descr_); + PlainDecoder dictionary(descr_); dictionary.SetData(page->num_values(), page->data(), page->size()); // The dictionary is fully decoded during DictionaryDecoder::Init, so the @@ -60,7 +60,7 @@ void TypedColumnReader::ConfigureDictionary(const DictionaryPage* page) { // TODO(wesm): investigate whether this all-or-nothing decoding of the // dictionary makes sense and whether performance can be improved - auto decoder = std::make_shared >(descr_, allocator_); + auto decoder = std::make_shared >(descr_, allocator_); decoder->SetDict(&dictionary); decoders_[encoding] = decoder; } else { @@ -77,8 +77,8 @@ static bool IsDictionaryIndexEncoding(const Encoding::type& e) { e == Encoding::PLAIN_DICTIONARY; } -template -bool TypedColumnReader::ReadNewPage() { +template +bool TypedColumnReader::ReadNewPage() { // Loop until we find the next data page. const uint8_t* buffer; @@ -147,7 +147,7 @@ bool TypedColumnReader::ReadNewPage() { } else { switch (encoding) { case Encoding::PLAIN: { - std::shared_ptr decoder(new PlainDecoder(descr_)); + std::shared_ptr decoder(new PlainDecoder(descr_)); decoders_[static_cast(encoding)] = decoder; current_decoder_ = decoder.get(); break; @@ -227,13 +227,13 @@ std::shared_ptr ColumnReader::Make( // ---------------------------------------------------------------------- // Instantiate templated classes -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; +template class TypedColumnReader; +template class TypedColumnReader; +template class TypedColumnReader; +template class TypedColumnReader; +template class TypedColumnReader; +template class TypedColumnReader; +template class TypedColumnReader; +template class TypedColumnReader; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/column/reader.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h index 0739e7e..7704c52 100644 --- a/src/parquet/column/reader.h +++ b/src/parquet/column/reader.h @@ -102,10 +102,10 @@ class ColumnReader { }; // API to read values from a single column. This is the main client facing API. -template +template class TypedColumnReader : public ColumnReader { public: - typedef typename type_traits::value_type T; + typedef typename DType::c_type T; TypedColumnReader(const ColumnDescriptor* schema, std::unique_ptr pager, @@ -131,7 +131,7 @@ class TypedColumnReader : public ColumnReader { T* values, int64_t* values_read); private: - typedef Decoder DecoderType; + typedef Decoder DecoderType; // Advance to the next data page virtual bool ReadNewPage(); @@ -153,14 +153,14 @@ class TypedColumnReader : public ColumnReader { }; -template -inline int64_t TypedColumnReader::ReadValues(int64_t batch_size, T* out) { +template +inline int64_t TypedColumnReader::ReadValues(int64_t batch_size, T* out) { int64_t num_decoded = current_decoder_->Decode(out, batch_size); return num_decoded; } -template -inline int64_t TypedColumnReader::ReadBatch(int batch_size, int16_t* def_levels, +template +inline int64_t TypedColumnReader::ReadBatch(int batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) { // HasNext invokes ReadNewPage if (!HasNext()) { @@ -208,14 +208,14 @@ inline int64_t TypedColumnReader::ReadBatch(int batch_size, int16_t* def_l } -typedef TypedColumnReader BoolReader; -typedef TypedColumnReader Int32Reader; -typedef TypedColumnReader Int64Reader; -typedef TypedColumnReader Int96Reader; -typedef TypedColumnReader FloatReader; -typedef TypedColumnReader DoubleReader; -typedef TypedColumnReader ByteArrayReader; -typedef TypedColumnReader FixedLenByteArrayReader; +typedef TypedColumnReader BoolReader; +typedef TypedColumnReader Int32Reader; +typedef TypedColumnReader Int64Reader; +typedef TypedColumnReader Int96Reader; +typedef TypedColumnReader FloatReader; +typedef TypedColumnReader DoubleReader; +typedef TypedColumnReader ByteArrayReader; +typedef TypedColumnReader FixedLenByteArrayReader; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/column/scanner-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/scanner-test.cc b/src/parquet/column/scanner-test.cc index 983f94e..78bc3c9 100644 --- a/src/parquet/column/scanner-test.cc +++ b/src/parquet/column/scanner-test.cc @@ -99,8 +99,8 @@ class TestFlatScanner : public ::testing::Test { } void CheckResults(int batch_size, const ColumnDescriptor *d) { - TypedScanner* scanner = - reinterpret_cast* >(scanner_.get()); + TypedScanner* scanner = + reinterpret_cast* >(scanner_.get()); T val; bool is_null = false; int16_t def_level; @@ -243,8 +243,8 @@ TEST_F(TestFlatFLBAScanner, TestDescriptorAPI) { data_buffer_, pages_); num_levels_ = 1 * 100; InitScanner(&d); - TypedScanner* scanner = - reinterpret_cast* >(scanner_.get()); + TypedScanner* scanner = + reinterpret_cast* >(scanner_.get()); ASSERT_EQ(10, scanner->descr()->type_precision()); ASSERT_EQ(2, scanner->descr()->type_scale()); ASSERT_EQ(FLBA_LENGTH, scanner->descr()->type_length()); @@ -258,8 +258,8 @@ TEST_F(TestFlatFLBAScanner, TestFLBAPrinterNext) { data_buffer_, pages_); num_levels_ = 1 * 100; InitScanner(&d); - TypedScanner* scanner = - reinterpret_cast* >(scanner_.get()); + TypedScanner* scanner = + reinterpret_cast* >(scanner_.get()); scanner->SetBatchSize(batch_size); std::stringstream ss_fail; for (int i = 0; i < num_levels_; i++) { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/column/scanner.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/scanner.h b/src/parquet/column/scanner.h index 5d04a0e..d52838e 100644 --- a/src/parquet/column/scanner.h +++ b/src/parquet/column/scanner.h @@ -91,17 +91,17 @@ class Scanner { }; -template +template class TypedScanner : public Scanner { public: - typedef typename type_traits::value_type T; + typedef typename DType::c_type T; explicit TypedScanner(std::shared_ptr reader, int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE, MemoryAllocator* allocator = default_allocator()) : Scanner(reader, batch_size, allocator) { - typed_reader_ = static_cast*>(reader.get()); - int value_byte_size = type_traits::value_byte_size; + typed_reader_ = static_cast*>(reader.get()); + int value_byte_size = type_traits::value_byte_size; value_buffer_.Resize(batch_size_ * value_byte_size); values_ = reinterpret_cast(&value_buffer_[0]); } @@ -183,7 +183,7 @@ class TypedScanner : public Scanner { } if (is_null) { - std::string null_fmt = format_fwf(width); + std::string null_fmt = format_fwf(width); snprintf(buffer, sizeof(buffer), null_fmt.c_str(), "NULL"); } else { FormatValue(&val, buffer, sizeof(buffer), width); @@ -193,7 +193,7 @@ class TypedScanner : public Scanner { private: // The ownership of this object is expressed through the reader_ variable in the base - TypedColumnReader* typed_reader_; + TypedColumnReader* typed_reader_; inline void FormatValue(void* val, char* buffer, int bufsize, int width); @@ -201,47 +201,47 @@ class TypedScanner : public Scanner { }; -template -inline void TypedScanner::FormatValue(void* val, char* buffer, +template +inline void TypedScanner::FormatValue(void* val, char* buffer, int bufsize, int width) { - std::string fmt = format_fwf(width); + std::string fmt = format_fwf(width); snprintf(buffer, bufsize, fmt.c_str(), *reinterpret_cast(val)); } template <> -inline void TypedScanner::FormatValue( +inline void TypedScanner::FormatValue( void* val, char* buffer, int bufsize, int width) { - std::string fmt = format_fwf(width); + std::string fmt = format_fwf(width); std::string result = Int96ToString(*reinterpret_cast(val)); snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); } template <> -inline void TypedScanner::FormatValue( +inline void TypedScanner::FormatValue( void* val, char* buffer, int bufsize, int width) { - std::string fmt = format_fwf(width); + std::string fmt = format_fwf(width); std::string result = ByteArrayToString(*reinterpret_cast(val)); snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); } template <> -inline void TypedScanner::FormatValue( +inline void TypedScanner::FormatValue( void* val, char* buffer, int bufsize, int width) { - std::string fmt = format_fwf(width); + std::string fmt = format_fwf(width); std::string result = FixedLenByteArrayToString( *reinterpret_cast(val), descr()->type_length()); snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); } -typedef TypedScanner BoolScanner; -typedef TypedScanner Int32Scanner; -typedef TypedScanner Int64Scanner; -typedef TypedScanner Int96Scanner; -typedef TypedScanner FloatScanner; -typedef TypedScanner DoubleScanner; -typedef TypedScanner ByteArrayScanner; -typedef TypedScanner FixedLenByteArrayScanner; +typedef TypedScanner BoolScanner; +typedef TypedScanner Int32Scanner; +typedef TypedScanner Int64Scanner; +typedef TypedScanner Int96Scanner; +typedef TypedScanner FloatScanner; +typedef TypedScanner DoubleScanner; +typedef TypedScanner ByteArrayScanner; +typedef TypedScanner FixedLenByteArrayScanner; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/column/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/test-util.h b/src/parquet/column/test-util.h index 2801f3c..95b1981 100644 --- a/src/parquet/column/test-util.h +++ b/src/parquet/column/test-util.h @@ -128,7 +128,7 @@ class DataPageBuilder { void AppendValues(const ColumnDescriptor *d, const vector& values, Encoding::type encoding = Encoding::PLAIN) { - PlainEncoder encoder(d); + PlainEncoder encoder(d); encoder.Encode(&values[0], values.size(), sink_); num_values_ = std::max(static_cast(values.size()), num_values_); @@ -195,7 +195,7 @@ void DataPageBuilder::AppendValues(const ColumnDescriptor *d, if (encoding != Encoding::PLAIN) { ParquetException::NYI("only plain encoding currently implemented"); } - PlainEncoder encoder(d); + PlainEncoder encoder(d); encoder.Encode(values, values.size(), sink_); num_values_ = std::max(static_cast(values.size()), num_values_); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/column/writer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc index f851316..4dcb672 100644 --- a/src/parquet/column/writer.cc +++ b/src/parquet/column/writer.cc @@ -117,14 +117,14 @@ int64_t ColumnWriter::Close() { // ---------------------------------------------------------------------- // TypedColumnWriter -template -TypedColumnWriter::TypedColumnWriter(const ColumnDescriptor* schema, +template +TypedColumnWriter::TypedColumnWriter(const ColumnDescriptor* schema, std::unique_ptr pager, int64_t expected_rows, MemoryAllocator* allocator) : ColumnWriter(schema, std::move(pager), expected_rows, allocator) { // TODO(PARQUET-590) Get decoder type from WriterProperties current_encoder_ = std::unique_ptr( - new PlainEncoder(schema, allocator)); + new PlainEncoder(schema, allocator)); } // ---------------------------------------------------------------------- @@ -170,14 +170,13 @@ std::shared_ptr ColumnWriter::Make( // ---------------------------------------------------------------------- // Instantiate templated classes -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; - +template class TypedColumnWriter; +template class TypedColumnWriter; +template class TypedColumnWriter; +template class TypedColumnWriter; +template class TypedColumnWriter; +template class TypedColumnWriter; +template class TypedColumnWriter; +template class TypedColumnWriter; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/column/writer.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/writer.h b/src/parquet/column/writer.h index b9ea265..7ccfe73 100644 --- a/src/parquet/column/writer.h +++ b/src/parquet/column/writer.h @@ -101,10 +101,10 @@ class ColumnWriter { }; // API to write values to a single column. This is the main client facing API. -template +template class TypedColumnWriter : public ColumnWriter { public: - typedef typename type_traits::value_type T; + typedef typename DType::c_type T; TypedColumnWriter(const ColumnDescriptor* schema, std::unique_ptr pager, int64_t expected_rows, @@ -116,7 +116,7 @@ class TypedColumnWriter : public ColumnWriter { T* values); private: - typedef Encoder EncoderType; + typedef Encoder EncoderType; // Write values to a temporary buffer before they are encoded into pages void WriteValues(int64_t num_values, T* values); @@ -135,8 +135,8 @@ class TypedColumnWriter : public ColumnWriter { // See also: parquet-column/../column/impl/ColumnWriteStoreV2.java:sizeCheck const int64_t PAGE_VALUE_COUNT = 1000; -template -inline void TypedColumnWriter::WriteBatch(int64_t num_values, int16_t* def_levels, +template +inline void TypedColumnWriter::WriteBatch(int64_t num_values, int16_t* def_levels, int16_t* rep_levels, T* values) { int64_t values_to_write = 0; @@ -185,22 +185,20 @@ inline void TypedColumnWriter::WriteBatch(int64_t num_values, int16_t* def } } -template -void TypedColumnWriter::WriteValues(int64_t num_values, T* values) { +template +void TypedColumnWriter::WriteValues(int64_t num_values, T* values) { current_encoder_->Encode(values, num_values, values_sink_.get()); } - -typedef TypedColumnWriter BoolWriter; -typedef TypedColumnWriter Int32Writer; -typedef TypedColumnWriter Int64Writer; -typedef TypedColumnWriter Int96Writer; -typedef TypedColumnWriter FloatWriter; -typedef TypedColumnWriter DoubleWriter; -typedef TypedColumnWriter ByteArrayWriter; -typedef TypedColumnWriter FixedLenByteArrayWriter; +typedef TypedColumnWriter BoolWriter; +typedef TypedColumnWriter Int32Writer; +typedef TypedColumnWriter Int64Writer; +typedef TypedColumnWriter Int96Writer; +typedef TypedColumnWriter FloatWriter; +typedef TypedColumnWriter DoubleWriter; +typedef TypedColumnWriter ByteArrayWriter; +typedef TypedColumnWriter FixedLenByteArrayWriter; } // namespace parquet #endif // PARQUET_COLUMN_READER_H - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/decoder.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/decoder.h b/src/parquet/encodings/decoder.h index f1928ee..36af107 100644 --- a/src/parquet/encodings/decoder.h +++ b/src/parquet/encodings/decoder.h @@ -28,11 +28,11 @@ namespace parquet { class ColumnDescriptor; -// The Decoder template is parameterized on parquet::Type::type -template +// The Decoder template is parameterized on parquet::DataType subclasses +template class Decoder { public: - typedef typename type_traits::value_type T; + typedef typename DType::c_type T; virtual ~Decoder() {} http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/delta-bit-pack-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/delta-bit-pack-encoding.h b/src/parquet/encodings/delta-bit-pack-encoding.h index a91dcc1..b0a16a7 100644 --- a/src/parquet/encodings/delta-bit-pack-encoding.h +++ b/src/parquet/encodings/delta-bit-pack-encoding.h @@ -28,16 +28,16 @@ namespace parquet { -template -class DeltaBitPackDecoder : public Decoder { +template +class DeltaBitPackDecoder : public Decoder { public: - typedef typename type_traits::value_type T; + typedef typename DType::c_type T; explicit DeltaBitPackDecoder(const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) - : Decoder(descr, Encoding::DELTA_BINARY_PACKED), + : Decoder(descr, Encoding::DELTA_BINARY_PACKED), delta_bit_widths_(0, allocator) { - if (TYPE != Type::INT32 && TYPE != Type::INT64) { + if (DType::type_num != Type::INT32 && DType::type_num != Type::INT64) { throw ParquetException("Delta bit pack encoding should only be for integer data."); } } @@ -54,7 +54,7 @@ class DeltaBitPackDecoder : public Decoder { } private: - using Decoder::num_values_; + using Decoder::num_values_; void InitBlock() { int32_t block_size; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/delta-byte-array-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/delta-byte-array-encoding.h b/src/parquet/encodings/delta-byte-array-encoding.h index d6e7b05..34867e2 100644 --- a/src/parquet/encodings/delta-byte-array-encoding.h +++ b/src/parquet/encodings/delta-byte-array-encoding.h @@ -26,11 +26,11 @@ namespace parquet { -class DeltaByteArrayDecoder : public Decoder { +class DeltaByteArrayDecoder : public Decoder { public: explicit DeltaByteArrayDecoder(const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) - : Decoder(descr, Encoding::DELTA_BYTE_ARRAY), + : Decoder(descr, Encoding::DELTA_BYTE_ARRAY), prefix_len_decoder_(nullptr, allocator), suffix_decoder_(nullptr, allocator) { } @@ -70,9 +70,9 @@ class DeltaByteArrayDecoder : public Decoder { } private: - using Decoder::num_values_; + using Decoder::num_values_; - DeltaBitPackDecoder prefix_len_decoder_; + DeltaBitPackDecoder prefix_len_decoder_; DeltaLengthByteArrayDecoder suffix_decoder_; ByteArray last_value_; }; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/delta-length-byte-array-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/delta-length-byte-array-encoding.h b/src/parquet/encodings/delta-length-byte-array-encoding.h index ee4c802..7a19aa3 100644 --- a/src/parquet/encodings/delta-length-byte-array-encoding.h +++ b/src/parquet/encodings/delta-length-byte-array-encoding.h @@ -27,11 +27,11 @@ namespace parquet { -class DeltaLengthByteArrayDecoder : public Decoder { +class DeltaLengthByteArrayDecoder : public Decoder { public: explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) : - Decoder(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY), + Decoder(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY), len_decoder_(nullptr, allocator) { } @@ -60,8 +60,8 @@ class DeltaLengthByteArrayDecoder : public Decoder { } private: - using Decoder::num_values_; - DeltaBitPackDecoder len_decoder_; + using Decoder::num_values_; + DeltaBitPackDecoder len_decoder_; const uint8_t* data_; int len_; }; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/dictionary-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h index 14f574e..e26ba2d 100644 --- a/src/parquet/encodings/dictionary-encoding.h +++ b/src/parquet/encodings/dictionary-encoding.h @@ -36,21 +36,21 @@ namespace parquet { -template -class DictionaryDecoder : public Decoder { +template +class DictionaryDecoder : public Decoder { public: - typedef typename type_traits::value_type T; + typedef typename Type::c_type T; // Initializes the dictionary with values from 'dictionary'. The data in // dictionary is not guaranteed to persist in memory after this call so the // dictionary decoder needs to copy the data out if necessary. explicit DictionaryDecoder(const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()): - Decoder(descr, Encoding::RLE_DICTIONARY), dictionary_(0, allocator), + Decoder(descr, Encoding::RLE_DICTIONARY), dictionary_(0, allocator), byte_array_data_(0, allocator) {} // Perform type-specific initiatialization - void SetDict(Decoder* dictionary); + void SetDict(Decoder* dictionary); virtual void SetData(int num_values, const uint8_t* data, int len) { num_values_ = num_values; @@ -70,7 +70,7 @@ class DictionaryDecoder : public Decoder { } private: - using Decoder::num_values_; + using Decoder::num_values_; int index() { int idx = 0; @@ -89,22 +89,22 @@ class DictionaryDecoder : public Decoder { RleDecoder idx_decoder_; }; -template -inline void DictionaryDecoder::SetDict(Decoder* dictionary) { +template +inline void DictionaryDecoder::SetDict(Decoder* dictionary) { int num_dictionary_values = dictionary->values_left(); dictionary_.Resize(num_dictionary_values); dictionary->Decode(&dictionary_[0], num_dictionary_values); } template <> -inline void DictionaryDecoder::SetDict( - Decoder* dictionary) { +inline void DictionaryDecoder::SetDict( + Decoder* dictionary) { ParquetException::NYI("Dictionary encoding is not implemented for boolean values"); } template <> -inline void DictionaryDecoder::SetDict( - Decoder* dictionary) { +inline void DictionaryDecoder::SetDict( + Decoder* dictionary) { int num_dictionary_values = dictionary->values_left(); dictionary_.Resize(num_dictionary_values); dictionary->Decode(&dictionary_[0], num_dictionary_values); @@ -123,8 +123,7 @@ inline void DictionaryDecoder::SetDict( } template <> -inline void DictionaryDecoder::SetDict( - Decoder* dictionary) { +inline void DictionaryDecoder::SetDict(Decoder* dictionary) { int num_dictionary_values = dictionary->values_left(); dictionary_.Resize(num_dictionary_values); dictionary->Decode(&dictionary_[0], num_dictionary_values); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/encoder.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/encoder.h b/src/parquet/encodings/encoder.h index d7af2f9..0d69111 100644 --- a/src/parquet/encodings/encoder.h +++ b/src/parquet/encodings/encoder.h @@ -32,10 +32,10 @@ class OutputStream; // dictionary encoding) we use a class instance to maintain any state. // // TODO(wesm): Encode interface API is temporary -template +template class Encoder { public: - typedef typename type_traits::value_type T; + typedef typename DType::c_type T; virtual ~Encoder() {} http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/encoding-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/encoding-test.cc b/src/parquet/encodings/encoding-test.cc index 18ff5cc..d55de76 100644 --- a/src/parquet/encodings/encoding-test.cc +++ b/src/parquet/encodings/encoding-test.cc @@ -47,8 +47,8 @@ TEST(VectorBooleanTest, TestEncodeDecode) { // seed the prng so failure is deterministic vector draws = flip_coins_seed(nvalues, 0.5, 0); - PlainEncoder encoder(nullptr); - PlainDecoder decoder(nullptr); + PlainEncoder encoder(nullptr); + PlainDecoder decoder(nullptr); InMemoryOutputStream dst; encoder.Encode(draws, nvalues, &dst); @@ -218,8 +218,8 @@ class TestPlainEncoding : public TestEncodingBase { static constexpr int TYPE = Type::type_num; virtual void CheckRoundtrip() { - PlainEncoder encoder(descr_.get()); - PlainDecoder decoder(descr_.get()); + PlainEncoder encoder(descr_.get()); + PlainDecoder decoder(descr_.get()); InMemoryOutputStream dst; encoder.Encode(draws_, num_values_, &dst); @@ -274,11 +274,11 @@ class TestDictionaryEncoding : public TestEncodingBase { indices->size()); indices->Resize(actual_bytes); - PlainDecoder dict_decoder(descr_.get()); + PlainDecoder dict_decoder(descr_.get()); dict_decoder.SetData(encoder.num_entries(), dict_buffer_->data(), dict_buffer_->size()); - DictionaryDecoder decoder(descr_.get()); + DictionaryDecoder decoder(descr_.get()); decoder.SetDict(&dict_decoder); decoder.SetData(num_values_, indices->data(), indices->size()); @@ -303,8 +303,8 @@ TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) { } TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) { - PlainDecoder dict_decoder(nullptr); - DictionaryDecoder decoder(nullptr); + PlainDecoder dict_decoder(nullptr); + DictionaryDecoder decoder(nullptr); ASSERT_THROW(decoder.SetDict(&dict_decoder), ParquetException); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/encodings/plain-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/plain-encoding.h b/src/parquet/encodings/plain-encoding.h index eee4463..56243c8 100644 --- a/src/parquet/encodings/plain-encoding.h +++ b/src/parquet/encodings/plain-encoding.h @@ -33,14 +33,14 @@ namespace parquet { // ---------------------------------------------------------------------- // Encoding::PLAIN decoder implementation -template -class PlainDecoder : public Decoder { +template +class PlainDecoder : public Decoder { public: - typedef typename type_traits::value_type T; - using Decoder::num_values_; + typedef typename DType::c_type T; + using Decoder::num_values_; explicit PlainDecoder(const ColumnDescriptor* descr) : - Decoder(descr, Encoding::PLAIN), + Decoder(descr, Encoding::PLAIN), data_(NULL), len_(0) { if (descr_ && descr_->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) { type_length_ = descr_->type_length(); @@ -58,7 +58,7 @@ class PlainDecoder : public Decoder { virtual int Decode(T* buffer, int max_values); private: - using Decoder::descr_; + using Decoder::descr_; const uint8_t* data_; int len_; int type_length_; @@ -112,8 +112,8 @@ inline int DecodePlain(const uint8_t* data, int64_t data_size return bytes_to_decode; } -template -inline int PlainDecoder::Decode(T* buffer, int max_values) { +template +inline int PlainDecoder::Decode(T* buffer, int max_values) { max_values = std::min(max_values, num_values_); int bytes_consumed = DecodePlain(data_, len_, max_values, type_length_, buffer); @@ -124,10 +124,10 @@ inline int PlainDecoder::Decode(T* buffer, int max_values) { } template <> -class PlainDecoder : public Decoder { +class PlainDecoder : public Decoder { public: explicit PlainDecoder(const ColumnDescriptor* descr) : - Decoder(descr, Encoding::PLAIN) {} + Decoder(descr, Encoding::PLAIN) {} virtual void SetData(int num_values, const uint8_t* data, int len) { num_values_ = num_values; @@ -168,24 +168,24 @@ class PlainDecoder : public Decoder { // ---------------------------------------------------------------------- // Encoding::PLAIN encoder implementation -template -class PlainEncoder : public Encoder { +template +class PlainEncoder : public Encoder { public: - typedef typename type_traits::value_type T; + typedef typename DType::c_type T; explicit PlainEncoder(const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) : - Encoder(descr, Encoding::PLAIN, allocator) {} + Encoder(descr, Encoding::PLAIN, allocator) {} void Encode(const T* src, int num_values, OutputStream* dst) override; }; template <> -class PlainEncoder : public Encoder { +class PlainEncoder : public Encoder { public: explicit PlainEncoder(const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) : - Encoder(descr, Encoding::PLAIN, allocator) {} + Encoder(descr, Encoding::PLAIN, allocator) {} virtual void Encode(const bool* src, int num_values, OutputStream* dst) { int bytes_required = BitUtil::Ceil(num_values, 8); @@ -221,14 +221,14 @@ class PlainEncoder : public Encoder { } }; -template -inline void PlainEncoder::Encode(const T* buffer, int num_values, +template +inline void PlainEncoder::Encode(const T* buffer, int num_values, OutputStream* dst) { dst->Write(reinterpret_cast(buffer), num_values * sizeof(T)); } template <> -inline void PlainEncoder::Encode(const ByteArray* src, +inline void PlainEncoder::Encode(const ByteArray* src, int num_values, OutputStream* dst) { for (int i = 0; i < num_values; ++i) { // Write the result to the output stream @@ -238,7 +238,7 @@ inline void PlainEncoder::Encode(const ByteArray* src, } template <> -inline void PlainEncoder::Encode( +inline void PlainEncoder::Encode( const FixedLenByteArray* src, int num_values, OutputStream* dst) { for (int i = 0; i < num_values; ++i) { // Write the result to the output stream http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/dc0fc7d4/src/parquet/types.h ---------------------------------------------------------------------- diff --git a/src/parquet/types.h b/src/parquet/types.h index 450d1ab..72017f9 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -277,10 +277,10 @@ typedef DataType DoubleType; typedef DataType ByteArrayType; typedef DataType FLBAType; -template +template inline std::string format_fwf(int width) { std::stringstream ss; - ss << "%-" << width << type_traits::printf_code; + ss << "%-" << width << type_traits::printf_code; return ss.str(); }