Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 2F9CA200C84 for ; Mon, 29 May 2017 21:56:09 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 2E719160BC6; Mon, 29 May 2017 19:56:09 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 28DF9160BCE for ; Mon, 29 May 2017 21:56:07 +0200 (CEST) Received: (qmail 96938 invoked by uid 500); 29 May 2017 19:56:06 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 96922 invoked by uid 99); 29 May 2017 19:56:05 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 29 May 2017 19:56:05 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4D733E01C3; Mon, 29 May 2017 19:56:05 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: wesm@apache.org To: commits@parquet.apache.org Date: Mon, 29 May 2017 19:56:06 -0000 Message-Id: <292749f7d24c4640b608f916e76610a2@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: =?utf-8?q?=5B2/2=5D_parquet-cpp_git_commit=3A_PARQUET-991=3A_Resol?= =?utf-8?q?ve_msvc_warnings=3B_Appveyor_treats_msvc_warnings_as_=E2=80=A6?= archived-at: Mon, 29 May 2017 19:56:09 -0000 PARQUET-991: Resolve msvc warnings; Appveyor treats msvc warnings as … …errors (/WX flag) Author: Max Risuhin Closes #340 from MaxRis/PARQUET-991 and squashes the following commits: 98a2544 [Max Risuhin] PARQUET-991: Resolve msvc warnings; Appveyor treats msvc warnings as errors (/WX flag) Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/fc5228af Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/fc5228af Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/fc5228af Branch: refs/heads/master Commit: fc5228af3eee2ec8176e404ecb34b7ba985d0e4d Parents: b36c9ac Author: Max Risuhin Authored: Mon May 29 15:55:59 2017 -0400 Committer: Wes McKinney Committed: Mon May 29 15:55:59 2017 -0400 ---------------------------------------------------------------------- CMakeLists.txt | 2 +- ci/msvc-build.bat | 9 +- examples/reader-writer.cc | 8 +- src/parquet/arrow/arrow-reader-writer-test.cc | 10 +- src/parquet/arrow/reader.cc | 138 +++++++++++---------- src/parquet/arrow/test-util.h | 8 +- src/parquet/arrow/writer.cc | 18 +-- src/parquet/column/column-reader-test.cc | 36 +++--- src/parquet/column/column-writer-test.cc | 13 +- src/parquet/column/levels-test.cc | 30 +++-- src/parquet/column/levels.cc | 8 +- src/parquet/column/page.h | 2 +- src/parquet/column/reader.cc | 23 ++-- src/parquet/column/reader.h | 40 +++--- src/parquet/column/scanner.h | 5 +- src/parquet/column/statistics-test.cc | 6 +- src/parquet/column/statistics.cc | 23 ++-- src/parquet/column/statistics.h | 16 +-- src/parquet/column/test-specialization.h | 10 +- src/parquet/column/test-util.h | 20 +-- src/parquet/column/writer.cc | 55 ++++---- src/parquet/column/writer.h | 22 ++-- src/parquet/compression-test.cc | 10 +- src/parquet/compression.cc | 8 +- src/parquet/encoding-internal.h | 22 ++-- src/parquet/encoding-test.cc | 16 +-- src/parquet/encoding.h | 2 +- src/parquet/file/file-deserialize-test.cc | 6 +- src/parquet/file/file-metadata-test.cc | 4 +- src/parquet/file/metadata.cc | 14 ++- src/parquet/file/reader-internal.cc | 2 +- src/parquet/file/writer-internal.cc | 12 +- src/parquet/reader-test.cc | 2 +- src/parquet/schema-test.cc | 2 +- src/parquet/schema.cc | 4 +- src/parquet/schema.h | 4 +- src/parquet/types-test.cc | 4 +- src/parquet/util/bit-stream-utils.h | 8 +- src/parquet/util/bit-stream-utils.inline.h | 36 ++++-- src/parquet/util/bit-util.h | 2 +- src/parquet/util/comparison-test.cc | 4 +- src/parquet/util/cpu-info.cc | 4 +- src/parquet/util/hash-util.h | 2 +- src/parquet/util/memory.cc | 2 +- src/parquet/util/memory.h | 4 +- src/parquet/util/rle-encoding.h | 28 +++-- src/parquet/util/rle-test.cc | 20 +-- src/parquet/util/visibility.h | 28 +++++ 48 files changed, 430 insertions(+), 322 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 02700c8..6c0156c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -367,7 +367,7 @@ include(ThirdpartyToolchain) # Thrift requires these definitions for some types that we use add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETDB_H) if (MSVC) - add_definitions(-DNOMINMAX) + add_definitions(-DNOMINMAX -D_CRT_SECURE_NO_WARNINGS) else() add_definitions(-DHAVE_NETINET_IN_H -fPIC) endif() http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/ci/msvc-build.bat ---------------------------------------------------------------------- diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat index 04743e6..9c3f8c1 100644 --- a/ci/msvc-build.bat +++ b/ci/msvc-build.bat @@ -21,6 +21,11 @@ mkdir build cd build SET PARQUET_TEST_DATA=%APPVEYOR_BUILD_FOLDER%\data +set PARQUET_CXXFLAGS=/MP + +if NOT "%CONFIGURATION%" == "Debug" ( + set PARQUET_CXXFLAGS="%PARQUET_CXXFLAGS% /WX" +) if "%CONFIGURATION%" == "Toolchain" ( conda install -y boost-cpp=1.63 brotli=0.6.0 zlib=1.2.11 snappy=1.1.4 thrift-cpp=0.10.0 -c conda-forge @@ -30,7 +35,7 @@ if "%CONFIGURATION%" == "Toolchain" ( cmake -G "%GENERATOR%" ^ -DCMAKE_BUILD_TYPE=Release ^ -DPARQUET_BOOST_USE_SHARED=OFF ^ - -DPARQUET_CXXFLAGS="/MP" ^ + -DPARQUET_CXXFLAGS=%PARQUET_CXXFLAGS% ^ -DPARQUET_ZLIB_VENDORED=OFF ^ .. || exit /B @@ -42,7 +47,7 @@ if NOT "%CONFIGURATION%" == "Toolchain" ( cmake -G "%GENERATOR%" ^ -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^ -DPARQUET_BOOST_USE_SHARED=OFF ^ - -DPARQUET_CXXFLAGS="/MP" ^ + -DPARQUET_CXXFLAGS=%PARQUET_CXXFLAGS% ^ .. || exit /B cmake --build . --config %CONFIGURATION% || exit /B http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/examples/reader-writer.cc ---------------------------------------------------------------------- diff --git a/examples/reader-writer.cc b/examples/reader-writer.cc index 9118c88..6f21f6c 100644 --- a/examples/reader-writer.cc +++ b/examples/reader-writer.cc @@ -30,7 +30,7 @@ * This example describes writing and reading Parquet Files in C++ and serves as a * reference to the API. * The file contains all the physical data types supported by Parquet. -**/ + **/ /* Parquet is a structured columnar file format * Parquet File = "Parquet data" + "Parquet Metadata" @@ -42,7 +42,7 @@ * complex (nested) type (internal nodes) * For specific details, please refer the format here: * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md -**/ + **/ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; constexpr int FIXED_LENGTH = 10; @@ -168,7 +168,7 @@ int main(int argc, char** argv) { parquet::FloatWriter* float_writer = static_cast(rg_writer->NextColumn()); for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { - float value = i * 1.1; + float value = i * 1.1f; float_writer->WriteBatch(1, nullptr, nullptr, &value); } @@ -367,7 +367,7 @@ int main(int argc, char** argv) { // There are no NULL values in the rows written assert(values_read == 1); // Verify the value written - float expected_value = i * 1.1; + float expected_value = i * 1.1f; assert(value == expected_value); i++; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/arrow-reader-writer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index 3d156b5..b9c77f1 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -15,6 +15,12 @@ // specific language governing permissions and limitations // under the License. +#ifdef _MSC_VER +#pragma warning(push) +// Disable forcing value to bool warnings +#pragma warning(disable : 4800) +#endif + #include "gtest/gtest.h" #include @@ -1172,13 +1178,11 @@ TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) { std::unique_ptr arrow_reader; ASSERT_NO_THROW( - arrow_reader.reset(new FileReader(pool, - ParquetFileReader::OpenFile(path, false)))); + arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false)))); std::shared_ptr<::arrow::Table> table; ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table)); } - } // namespace arrow } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index 826fe37..a531454 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -295,7 +295,7 @@ Status FileReader::Impl::ReadColumn(int i, std::shared_ptr* out) { batch_size += reader_->metadata()->RowGroup(j)->ColumnChunk(i)->num_values(); } - return flat_column_reader->NextBatch(batch_size, out); + return flat_column_reader->NextBatch(static_cast(batch_size), out); } Status FileReader::Impl::GetSchema( @@ -319,7 +319,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index, // TODO(wesm): Refactor to share more code with ReadTable auto ReadColumnFunc = [&indices, &row_group_index, &schema, &columns, &rg_metadata, - this](int i) { + this](int i) { int column_index = indices[i]; int64_t batch_size = rg_metadata->ColumnChunk(column_index)->num_values(); @@ -331,7 +331,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index, ColumnReader flat_column_reader(std::move(impl)); std::shared_ptr array; - RETURN_NOT_OK(flat_column_reader.NextBatch(batch_size, &array)); + RETURN_NOT_OK(flat_column_reader.NextBatch(static_cast(batch_size), &array)); columns[i] = std::make_shared(schema->field(i), array); return Status::OK(); }; @@ -380,7 +380,7 @@ Status FileReader::Impl::ReadTable(std::shared_ptr* table) { std::vector indices(reader_->metadata()->num_columns()); for (size_t i = 0; i < indices.size(); ++i) { - indices[i] = i; + indices[i] = static_cast(i); } return ReadTable(indices, table); } @@ -389,7 +389,7 @@ Status FileReader::Impl::ReadRowGroup(int i, std::shared_ptr
* table) { std::vector indices(reader_->metadata()->num_columns()); for (size_t i = 0; i < indices.size(); ++i) { - indices[i] = i; + indices[i] = static_cast(i); } return ReadRowGroup(i, indices, table); } @@ -479,8 +479,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch(TypedColumnReader* RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); ArrowCType* out_ptr = reinterpret_cast(data_buffer_ptr_); std::copy(values, values + values_read, out_ptr + valid_bits_idx_); @@ -489,19 +489,20 @@ Status ColumnReader::Impl::ReadNonNullableBatch(TypedColumnReader* return Status::OK(); } -#define NONNULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ - template <> \ - Status ColumnReader::Impl::ReadNonNullableBatch( \ - TypedColumnReader * reader, int64_t values_to_read, \ - int64_t * levels_read) { \ - int64_t values_read; \ - CType* out_ptr = reinterpret_cast(data_buffer_ptr_); \ - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(values_to_read, nullptr, \ - nullptr, out_ptr + valid_bits_idx_, &values_read)); \ - \ - valid_bits_idx_ += values_read; \ - \ - return Status::OK(); \ +#define NONNULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ + template <> \ + Status ColumnReader::Impl::ReadNonNullableBatch( \ + TypedColumnReader * reader, int64_t values_to_read, \ + int64_t * levels_read) { \ + int64_t values_read; \ + CType* out_ptr = reinterpret_cast(data_buffer_ptr_); \ + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( \ + static_cast(values_to_read), nullptr, nullptr, \ + out_ptr + valid_bits_idx_, &values_read)); \ + \ + valid_bits_idx_ += values_read; \ + \ + return Status::OK(); \ } NONNULLABLE_BATCH_FAST_PATH(::arrow::Int32Type, Int32Type, int32_t) @@ -519,8 +520,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::TimestampType, Int96Typ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); int64_t* out_ptr = reinterpret_cast(data_buffer_ptr_) + valid_bits_idx_; for (int64_t i = 0; i < values_read; i++) { @@ -537,8 +538,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::Date64Type, Int32Type>( RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); int64_t* out_ptr = reinterpret_cast(data_buffer_ptr_) + valid_bits_idx_; for (int64_t i = 0; i < values_read; i++) { @@ -556,8 +557,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::BooleanType, BooleanTyp RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); for (int64_t i = 0; i < values_read; i++) { if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_); } @@ -577,11 +578,12 @@ Status ColumnReader::Impl::ReadNullableBatch(TypedColumnReader* rea RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); auto data_ptr = reinterpret_cast(data_buffer_ptr_); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { @@ -595,22 +597,22 @@ Status ColumnReader::Impl::ReadNullableBatch(TypedColumnReader* rea return Status::OK(); } -#define NULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ - template <> \ - Status ColumnReader::Impl::ReadNullableBatch( \ - TypedColumnReader * reader, int16_t * def_levels, \ - int16_t * rep_levels, int64_t values_to_read, int64_t * levels_read, \ - int64_t * values_read) { \ - auto data_ptr = reinterpret_cast(data_buffer_ptr_); \ - int64_t null_count; \ - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, \ - data_ptr + valid_bits_idx_, valid_bits_ptr_, valid_bits_idx_, levels_read, \ - values_read, &null_count)); \ - \ - valid_bits_idx_ += *values_read; \ - null_count_ += null_count; \ - \ - return Status::OK(); \ +#define NULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ + template <> \ + Status ColumnReader::Impl::ReadNullableBatch( \ + TypedColumnReader * reader, int16_t * def_levels, \ + int16_t * rep_levels, int64_t values_to_read, int64_t * levels_read, \ + int64_t * values_read) { \ + auto data_ptr = reinterpret_cast(data_buffer_ptr_); \ + int64_t null_count; \ + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), \ + def_levels, rep_levels, data_ptr + valid_bits_idx_, valid_bits_ptr_, \ + valid_bits_idx_, levels_read, values_read, &null_count)); \ + \ + valid_bits_idx_ += *values_read; \ + null_count_ += null_count; \ + \ + return Status::OK(); \ } NULLABLE_BATCH_FAST_PATH(::arrow::Int32Type, Int32Type, int32_t) @@ -629,11 +631,12 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::TimestampType, Int96Type>( RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); auto data_ptr = reinterpret_cast(data_buffer_ptr_); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { data_ptr[valid_bits_idx_ + i] = impala_timestamp_to_nanoseconds(values[i]); @@ -653,11 +656,12 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::Date64Type, Int32Type>( RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); auto data_ptr = reinterpret_cast(data_buffer_ptr_); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { data_ptr[valid_bits_idx_ + i] = static_cast(values[i]) * 86400000; @@ -677,10 +681,11 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::BooleanType, BooleanType>( RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_ + i); } @@ -716,7 +721,8 @@ Status ColumnReader::Impl::InitDataBuffer<::arrow::BooleanType>(int batch_size) Status ColumnReader::Impl::InitValidBits(int batch_size) { valid_bits_idx_ = 0; if (descr_->max_definition_level() > 0) { - int valid_bits_size = ::arrow::BitUtil::CeilByte(batch_size + 1) / 8; + int valid_bits_size = + static_cast(::arrow::BitUtil::CeilByte(batch_size + 1)) / 8; valid_bits_buffer_ = std::make_shared(pool_); RETURN_NOT_OK(valid_bits_buffer_->Resize(valid_bits_size, false)); valid_bits_ptr_ = valid_bits_buffer_->mutable_data(); @@ -786,7 +792,8 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels, if (j == (list_depth - 1)) { RETURN_NOT_OK(offset_builders[j]->Append(values_offset)); } else { - RETURN_NOT_OK(offset_builders[j]->Append(offset_builders[j + 1]->length())); + RETURN_NOT_OK(offset_builders[j]->Append( + static_cast(offset_builders[j + 1]->length()))); } if (((empty_def_level[j] - 1) == def_levels[i]) && (nullable[j])) { @@ -806,7 +813,8 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels, if (j == (list_depth - 1)) { RETURN_NOT_OK(offset_builders[j]->Append(values_offset)); } else { - RETURN_NOT_OK(offset_builders[j]->Append(offset_builders[j + 1]->length())); + RETURN_NOT_OK(offset_builders[j]->Append( + static_cast(offset_builders[j + 1]->length()))); } } @@ -864,9 +872,9 @@ Status ColumnReader::Impl::TypedReadBatch(int batch_size, std::shared_ptr RETURN_NOT_OK((ReadNullableBatch(reader, def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read, &levels_read, &values_read))); - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } - values_to_read -= values_read; + values_to_read -= static_cast(values_read); if (!column_reader_->HasNext()) { NextRowGroup(); } } @@ -925,9 +933,9 @@ Status ColumnReader::Impl::TypedReadBatch<::arrow::BooleanType, BooleanType>( RETURN_NOT_OK((ReadNullableBatch<::arrow::BooleanType, BooleanType>(reader, def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read, &levels_read, &values_read))); - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } - values_to_read -= values_read; + values_to_read -= static_cast(values_read); if (!column_reader_->HasNext()) { NextRowGroup(); } } @@ -991,7 +999,7 @@ Status ColumnReader::Impl::ReadByteArrayBatch( PARQUET_CATCH_NOT_OK( levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read, rep_levels + total_levels_read, values, &values_read)); - values_to_read -= levels_read; + values_to_read -= static_cast(levels_read); if (descr_->max_definition_level() == 0) { for (int64_t i = 0; i < levels_read; i++) { RETURN_NOT_OK( @@ -1012,7 +1020,7 @@ Status ColumnReader::Impl::ReadByteArrayBatch( values_idx++; } } - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } if (!column_reader_->HasNext()) { NextRowGroup(); } } @@ -1047,7 +1055,7 @@ Status ColumnReader::Impl::ReadFLBABatch( PARQUET_CATCH_NOT_OK( levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read, rep_levels + total_levels_read, values, &values_read)); - values_to_read -= levels_read; + values_to_read -= static_cast(levels_read); if (descr_->max_definition_level() == 0) { for (int64_t i = 0; i < levels_read; i++) { RETURN_NOT_OK(builder.Append(values[i].ptr)); @@ -1064,7 +1072,7 @@ Status ColumnReader::Impl::ReadFLBABatch( values_idx++; } } - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } if (!column_reader_->HasNext()) { NextRowGroup(); } } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h index 388250e..a5337cf 100644 --- a/src/parquet/arrow/test-util.h +++ b/src/parquet/arrow/test-util.h @@ -210,7 +210,7 @@ NullableArray( if (!valid_bytes[i]) { builder.AppendNull(); } else { - ::arrow::test::random_bytes(kBufferSize, seed + i, buffer); + ::arrow::test::random_bytes(kBufferSize, seed + static_cast(i), buffer); builder.Append(buffer, kBufferSize); } } @@ -240,7 +240,7 @@ NullableArray( if (!valid_bytes[i]) { builder.AppendNull(); } else { - ::arrow::test::random_bytes(kBufferSize, seed + i, buffer); + ::arrow::test::random_bytes(kBufferSize, seed + static_cast(i), buffer); builder.Append(buffer); } } @@ -294,10 +294,10 @@ Status MakeListArary(const std::shared_ptr& values, int64_t size, if (!(((i % 2) == 0) && ((i / 2) < null_count))) { // Non-null list (list with index 1 is always empty). ::arrow::BitUtil::SetBit(null_bitmap_ptr, i); - if (i != 1) { current_offset += length_per_entry; } + if (i != 1) { current_offset += static_cast(length_per_entry); } } } - offsets_ptr[size] = values->length(); + offsets_ptr[size] = static_cast(values->length()); auto value_field = std::make_shared<::arrow::Field>("item", values->type(), nullable_values); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/writer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index 631e16c..3344d1b 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -63,7 +63,7 @@ class LevelBuilder { Status VisitInline(const Array& array); Status Visit(const ::arrow::PrimitiveArray& array) { - array_offsets_.push_back(array.offset()); + array_offsets_.push_back(static_cast(array.offset())); valid_bitmaps_.push_back(array.null_bitmap_data()); null_counts_.push_back(array.null_count()); values_type_ = array.type_id(); @@ -72,7 +72,7 @@ class LevelBuilder { } Status Visit(const ::arrow::BinaryArray& array) { - array_offsets_.push_back(array.offset()); + array_offsets_.push_back(static_cast(array.offset())); valid_bitmaps_.push_back(array.null_bitmap_data()); null_counts_.push_back(array.null_count()); values_type_ = array.type_id(); @@ -81,7 +81,7 @@ class LevelBuilder { } Status Visit(const ListArray& array) { - array_offsets_.push_back(array.offset()); + array_offsets_.push_back(static_cast(array.offset())); valid_bitmaps_.push_back(array.null_bitmap_data()); null_counts_.push_back(array.null_count()); offsets_.push_back(array.raw_value_offsets()); @@ -111,7 +111,7 @@ class LevelBuilder { std::shared_ptr* rep_levels, const Array** values_array) { // Work downwards to extract bitmaps and offsets min_offset_idx_ = 0; - max_offset_idx_ = array.length(); + max_offset_idx_ = static_cast(array.length()); RETURN_NOT_OK(VisitInline(array)); *num_values = max_offset_idx_ - min_offset_idx_; *values_offset = min_offset_idx_; @@ -143,7 +143,7 @@ class LevelBuilder { std::fill(def_levels_ptr, def_levels_ptr + array.length(), 1); } else { const uint8_t* valid_bits = array.null_bitmap_data(); - INIT_BITSET(valid_bits, array.offset()); + INIT_BITSET(valid_bits, static_cast(array.offset())); for (int i = 0; i < array.length(); i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { def_levels_ptr[i] = 1; @@ -396,7 +396,7 @@ Status FileWriter::Impl::WriteNullableBatch(TypedColumnWriter* writ RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(ParquetCType))); auto buffer_ptr = reinterpret_cast(data_buffer_.mutable_data()); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); for (int i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { buffer_ptr[i] = static_cast(data_ptr[i]); @@ -417,7 +417,7 @@ Status FileWriter::Impl::WriteNullableBatch( const int64_t* data_ptr) { RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t))); auto buffer_ptr = reinterpret_cast(data_buffer_.mutable_data()); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); for (int i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { // Convert from milliseconds into days since the epoch @@ -439,7 +439,7 @@ Status FileWriter::Impl::WriteNullableBatch( const int32_t* data_ptr) { RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t))); auto buffer_ptr = reinterpret_cast(data_buffer_.mutable_data()); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); if (type.unit() == TimeUnit::SECOND) { for (int i = 0; i < num_values; i++) { @@ -497,7 +497,7 @@ Status FileWriter::Impl::TypedWriteBatch( auto writer = reinterpret_cast*>(column_writer); int buffer_idx = 0; - int32_t offset = array->offset(); + int64_t offset = array->offset(); for (int i = 0; i < data->length(); i++) { if (!data->IsNull(i)) { buffer_ptr[buffer_idx++] = BitUtil::GetBit(data_ptr, offset + i); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/column-reader-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/column-reader-test.cc b/src/parquet/column/column-reader-test.cc index 6bf6651..a31c817 100644 --- a/src/parquet/column/column-reader-test.cc +++ b/src/parquet/column/column-reader-test.cc @@ -89,9 +89,9 @@ class TestPrimitiveReader : public ::testing::Test { // 1) batch_size < page_size (multiple ReadBatch from a single page) // 2) batch_size > page_size (BatchRead limits to a single page) do { - batch = reader->ReadBatch(batch_size, &dresult[0] + batch_actual, - &rresult[0] + batch_actual, &vresult[0] + total_values_read, &values_read); - total_values_read += values_read; + batch = static_cast(reader->ReadBatch(batch_size, &dresult[0] + batch_actual, + &rresult[0] + batch_actual, &vresult[0] + total_values_read, &values_read)); + total_values_read += static_cast(values_read); batch_actual += batch; batch_size = std::max(batch_size * 2, 4096); } while (batch > 0); @@ -102,7 +102,8 @@ class TestPrimitiveReader : public ::testing::Test { if (max_def_level_ > 0) { ASSERT_TRUE(vector_equal(def_levels_, dresult)); } if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); } // catch improper writes at EOS - batch_actual = reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read); + batch_actual = + static_cast(reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read)); ASSERT_EQ(0, batch_actual); ASSERT_EQ(0, values_read); } @@ -126,12 +127,13 @@ class TestPrimitiveReader : public ::testing::Test { // 1) batch_size < page_size (multiple ReadBatch from a single page) // 2) batch_size > page_size (BatchRead limits to a single page) do { - batch = reader->ReadBatchSpaced(batch_size, dresult.data() + levels_actual, - rresult.data() + levels_actual, vresult.data() + batch_actual, - valid_bits.data() + batch_actual, 0, &levels_read, &values_read, &null_count); - total_values_read += batch - null_count; + batch = static_cast(reader->ReadBatchSpaced(batch_size, + dresult.data() + levels_actual, rresult.data() + levels_actual, + vresult.data() + batch_actual, valid_bits.data() + batch_actual, 0, + &levels_read, &values_read, &null_count)); + total_values_read += batch - static_cast(null_count); batch_actual += batch; - levels_actual += levels_read; + levels_actual += static_cast(levels_read); batch_size = std::max(batch_size * 2, 4096); } while ((batch > 0) || (levels_read > 0)); @@ -146,8 +148,8 @@ class TestPrimitiveReader : public ::testing::Test { } if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); } // catch improper writes at EOS - batch_actual = reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr, - valid_bits.data(), 0, &levels_read, &values_read, &null_count); + batch_actual = static_cast(reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr, + valid_bits.data(), 0, &levels_read, &values_read, &null_count)); ASSERT_EQ(0, batch_actual); ASSERT_EQ(0, null_count); } @@ -262,8 +264,8 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) { // Read half a page reader->ReadBatch( levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read); - vector sub_values( - values_.begin() + 2 * levels_per_page, values_.begin() + 2.5 * levels_per_page); + vector sub_values(values_.begin() + 2 * levels_per_page, + values_.begin() + static_cast(2.5 * static_cast(levels_per_page))); ASSERT_TRUE(vector_equal(sub_values, vresult)); // 2) skip_size == page_size (skip across two pages) @@ -273,7 +275,8 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) { reader->ReadBatch( levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read); sub_values.clear(); - sub_values.insert(sub_values.end(), values_.begin() + 3.5 * levels_per_page, + sub_values.insert(sub_values.end(), + values_.begin() + static_cast(3.5 * static_cast(levels_per_page)), values_.begin() + 4 * levels_per_page); ASSERT_TRUE(vector_equal(sub_values, vresult)); @@ -285,8 +288,9 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) { reader->ReadBatch( levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read); sub_values.clear(); - sub_values.insert( - sub_values.end(), values_.begin() + 4.5 * levels_per_page, values_.end()); + sub_values.insert(sub_values.end(), + values_.begin() + static_cast(4.5 * static_cast(levels_per_page)), + values_.end()); ASSERT_TRUE(vector_equal(sub_values, vresult)); values_.clear(); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/column-writer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/column-writer-test.cc b/src/parquet/column/column-writer-test.cc index dedb2c2..33eefac 100644 --- a/src/parquet/column/column-writer-test.cc +++ b/src/parquet/column/column-writer-test.cc @@ -93,8 +93,9 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { void ReadColumn(Compression::type compression = Compression::UNCOMPRESSED) { BuildReader(static_cast(this->values_out_.size()), compression); - reader_->ReadBatch(this->values_out_.size(), definition_levels_out_.data(), - repetition_levels_out_.data(), this->values_out_ptr_, &values_read_); + reader_->ReadBatch(static_cast(this->values_out_.size()), + definition_levels_out_.data(), repetition_levels_out_.data(), + this->values_out_ptr_, &values_read_); this->SyncValuesOut(); } @@ -133,7 +134,7 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { Compression::type compression, bool enable_dictionary, bool enable_statistics, int64_t num_rows) { std::vector valid_bits( - BitUtil::RoundUpNumBytes(this->values_.size()) + 1, 255); + BitUtil::RoundUpNumBytes(static_cast(this->values_.size())) + 1, 255); ColumnProperties column_properties( encoding, compression, enable_dictionary, enable_statistics); std::shared_ptr> writer = @@ -204,7 +205,8 @@ void TestPrimitiveWriter::ReadColumnFully(Compression::type compressio values_read_ = 0; while (values_read_ < total_values) { int64_t values_read_recently = 0; - reader_->ReadBatch(this->values_out_.size() - values_read_, + reader_->ReadBatch( + static_cast(this->values_out_.size()) - static_cast(values_read_), definition_levels_out_.data() + values_read_, repetition_levels_out_.data() + values_read_, this->values_out_ptr_ + values_read_, &values_read_recently); @@ -222,7 +224,8 @@ void TestPrimitiveWriter::ReadColumnFully(Compression::type compressio values_read_ = 0; while (values_read_ < total_values) { int64_t values_read_recently = 0; - reader_->ReadBatch(this->values_out_.size() - values_read_, + reader_->ReadBatch( + static_cast(this->values_out_.size()) - static_cast(values_read_), definition_levels_out_.data() + values_read_, repetition_levels_out_.data() + values_read_, this->values_out_ptr_ + values_read_, &values_read_recently); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/levels-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/levels-test.cc b/src/parquet/column/levels-test.cc index 1d29313..a6284a9 100644 --- a/src/parquet/column/levels-test.cc +++ b/src/parquet/column/levels-test.cc @@ -52,17 +52,18 @@ void EncodeLevels(Encoding::type encoding, int max_level, int num_levels, LevelEncoder encoder; int levels_count = 0; bytes.resize(2 * num_levels); - ASSERT_EQ(2 * num_levels, bytes.size()); + ASSERT_EQ(2 * num_levels, static_cast(bytes.size())); // encode levels if (encoding == Encoding::RLE) { // leave space to write the rle length value - encoder.Init( - encoding, max_level, num_levels, bytes.data() + sizeof(int32_t), bytes.size()); + encoder.Init(encoding, max_level, num_levels, bytes.data() + sizeof(int32_t), + static_cast(bytes.size())); levels_count = encoder.Encode(num_levels, input_levels); (reinterpret_cast(bytes.data()))[0] = encoder.len(); } else { - encoder.Init(encoding, max_level, num_levels, bytes.data(), bytes.size()); + encoder.Init( + encoding, max_level, num_levels, bytes.data(), static_cast(bytes.size())); levels_count = encoder.Encode(num_levels, input_levels); } ASSERT_EQ(num_levels, levels_count); @@ -73,10 +74,10 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level, LevelDecoder decoder; int levels_count = 0; std::vector output_levels; - int num_levels = input_levels.size(); + int num_levels = static_cast(input_levels.size()); output_levels.resize(num_levels); - ASSERT_EQ(num_levels, output_levels.size()); + ASSERT_EQ(num_levels, static_cast(output_levels.size())); // Decode levels and test with multiple decode calls decoder.SetData(encoding, max_level, num_levels, bytes.data()); @@ -112,13 +113,13 @@ void VerifyDecodingMultipleSetData(Encoding::type encoding, int max_level, std::vector output_levels; // Decode levels and test with multiple SetData calls - int setdata_count = bytes.size(); - int num_levels = input_levels.size() / setdata_count; + int setdata_count = static_cast(bytes.size()); + int num_levels = static_cast(input_levels.size()) / setdata_count; output_levels.resize(num_levels); // Try multiple SetData for (int ct = 0; ct < setdata_count; ct++) { int offset = ct * num_levels; - ASSERT_EQ(num_levels, output_levels.size()); + ASSERT_EQ(num_levels, static_cast(output_levels.size())); decoder.SetData(encoding, max_level, num_levels, bytes[ct].data()); levels_count = decoder.Decode(num_levels, output_levels.data()); ASSERT_EQ(num_levels, levels_count); @@ -149,7 +150,8 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) { int max_level = (1 << bit_width) - 1; // Generate levels GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels); - EncodeLevels(encoding, max_level, input_levels.size(), input_levels.data(), bytes); + EncodeLevels(encoding, max_level, static_cast(input_levels.size()), + input_levels.data(), bytes); VerifyDecodingLevels(encoding, max_level, input_levels, bytes); input_levels.clear(); } @@ -166,7 +168,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleSetData) { std::vector> bytes; Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED}; GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels); - int num_levels = input_levels.size(); + int num_levels = static_cast(input_levels.size()); int setdata_factor = 8; int split_level_size = num_levels / setdata_factor; bytes.resize(setdata_factor); @@ -200,7 +202,8 @@ TEST(TestLevelEncoder, MinimumBufferSize) { LevelEncoder::MaxBufferSize(Encoding::RLE, 1, kNumToEncode)); LevelEncoder encoder; - encoder.Init(Encoding::RLE, 1, kNumToEncode, output.data(), output.size()); + encoder.Init( + Encoding::RLE, 1, kNumToEncode, output.data(), static_cast(output.size())); int encode_count = encoder.Encode(kNumToEncode, levels.data()); ASSERT_EQ(kNumToEncode, encode_count); @@ -231,7 +234,8 @@ TEST(TestLevelEncoder, MinimumBufferSize2) { LevelEncoder::MaxBufferSize(Encoding::RLE, bit_width, kNumToEncode)); LevelEncoder encoder; - encoder.Init(Encoding::RLE, bit_width, kNumToEncode, output.data(), output.size()); + encoder.Init(Encoding::RLE, bit_width, kNumToEncode, output.data(), + static_cast(output.size())); int encode_count = encoder.Encode(kNumToEncode, levels.data()); ASSERT_EQ(kNumToEncode, encode_count); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/levels.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/levels.cc b/src/parquet/column/levels.cc index 716e08a..fd25420 100644 --- a/src/parquet/column/levels.cc +++ b/src/parquet/column/levels.cc @@ -36,7 +36,8 @@ void LevelEncoder::Init(Encoding::type encoding, int16_t max_level, break; } case Encoding::BIT_PACKED: { - int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8); + int num_bytes = + static_cast(BitUtil::Ceil(num_buffered_values * bit_width_, 8)); bit_packed_encoder_.reset(new BitWriter(data, num_bytes)); break; } @@ -58,7 +59,7 @@ int LevelEncoder::MaxBufferSize( break; } case Encoding::BIT_PACKED: { - num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8); + num_bytes = static_cast(BitUtil::Ceil(num_buffered_values * bit_width, 8)); break; } default: @@ -112,7 +113,8 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level, return sizeof(int32_t) + num_bytes; } case Encoding::BIT_PACKED: { - num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8); + num_bytes = + static_cast(BitUtil::Ceil(num_buffered_values * bit_width_, 8)); if (!bit_packed_decoder_) { bit_packed_decoder_.reset(new BitReader(data, num_bytes)); } else { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/page.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/page.h b/src/parquet/column/page.h index bca0ca4..a3813c5 100644 --- a/src/parquet/column/page.h +++ b/src/parquet/column/page.h @@ -52,7 +52,7 @@ class Page { const uint8_t* data() const { return buffer_->data(); } // @returns: the total size in bytes of the page's data buffer - int32_t size() const { return buffer_->size(); } + int32_t size() const { return static_cast(buffer_->size()); } private: std::shared_ptr buffer_; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc index 71bb689..fe2de57 100644 --- a/src/parquet/column/reader.cc +++ b/src/parquet/column/reader.cc @@ -169,7 +169,8 @@ bool TypedColumnReader::ReadNewPage() { throw ParquetException("Unknown encoding type."); } } - current_decoder_->SetData(num_buffered_values_, buffer, data_size); + current_decoder_->SetData( + num_buffered_values_, buffer, static_cast(data_size)); return true; } else { // We don't know what this page type is. We're allowed to skip non-data @@ -185,12 +186,12 @@ bool TypedColumnReader::ReadNewPage() { int64_t ColumnReader::ReadDefinitionLevels(int64_t batch_size, int16_t* levels) { if (descr_->max_definition_level() == 0) { return 0; } - return definition_level_decoder_.Decode(batch_size, levels); + return definition_level_decoder_.Decode(static_cast(batch_size), levels); } int64_t ColumnReader::ReadRepetitionLevels(int64_t batch_size, int16_t* levels) { if (descr_->max_repetition_level() == 0) { return 0; } - return repetition_level_decoder_.Decode(batch_size, levels); + return repetition_level_decoder_.Decode(static_cast(batch_size), levels); } // ---------------------------------------------------------------------- @@ -225,13 +226,13 @@ std::shared_ptr ColumnReader::Make( // ---------------------------------------------------------------------- // Instantiate templated classes -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/reader.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h index e0c6585..80084b2 100644 --- a/src/parquet/column/reader.h +++ b/src/parquet/column/reader.h @@ -205,7 +205,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader { template inline int64_t TypedColumnReader::ReadValues(int64_t batch_size, T* out) { - int64_t num_decoded = current_decoder_->Decode(out, batch_size); + int64_t num_decoded = current_decoder_->Decode(out, static_cast(batch_size)); return num_decoded; } @@ -213,7 +213,7 @@ template inline int64_t TypedColumnReader::ReadValuesSpaced(int64_t batch_size, T* out, int null_count, uint8_t* valid_bits, int64_t valid_bits_offset) { return current_decoder_->DecodeSpaced( - out, batch_size, null_count, valid_bits, valid_bits_offset); + out, static_cast(batch_size), null_count, valid_bits, valid_bits_offset); } template @@ -257,7 +257,7 @@ inline int64_t TypedColumnReader::ReadBatch(int batch_size, int16_t* def_ *values_read = ReadValues(values_to_read, values); int64_t total_values = std::max(num_def_levels, *values_read); - num_decoded_values_ += total_values; + num_decoded_values_ += static_cast(total_values); return total_values; } @@ -265,8 +265,8 @@ inline int64_t TypedColumnReader::ReadBatch(int batch_size, int16_t* def_ inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels, int16_t max_definition_level, int64_t* values_read, int64_t* null_count, uint8_t* valid_bits, int64_t valid_bits_offset) { - int byte_offset = valid_bits_offset / 8; - int bit_offset = valid_bits_offset % 8; + int byte_offset = static_cast(valid_bits_offset) / 8; + int bit_offset = static_cast(valid_bits_offset) % 8; uint8_t bitset = valid_bits[byte_offset]; for (int i = 0; i < num_def_levels; ++i) { @@ -338,8 +338,8 @@ inline int64_t TypedColumnReader::ReadBatchSpaced(int batch_size, int16_t max_definition_level = descr_->max_definition_level(); DefinitionLevelsToBitmap(def_levels, num_def_levels, max_definition_level, values_read, &null_count, valid_bits, valid_bits_offset); - total_values = ReadValuesSpaced( - *values_read, values, null_count, valid_bits, valid_bits_offset); + total_values = ReadValuesSpaced(*values_read, values, static_cast(null_count), + valid_bits, valid_bits_offset); } *levels_read = num_def_levels; *null_count_out = null_count; @@ -354,7 +354,7 @@ inline int64_t TypedColumnReader::ReadBatchSpaced(int batch_size, *levels_read = total_values; } - num_decoded_values_ += *levels_read; + num_decoded_values_ += static_cast(*levels_read); return total_values; } @@ -383,10 +383,10 @@ inline int64_t TypedColumnReader::Skip(int64_t num_rows_to_skip) { do { batch_size = std::min(batch_size, rows_to_skip); - values_read = - ReadBatch(batch_size, reinterpret_cast(def_levels->mutable_data()), - reinterpret_cast(rep_levels->mutable_data()), - reinterpret_cast(vals->mutable_data()), &values_read); + values_read = ReadBatch(static_cast(batch_size), + reinterpret_cast(def_levels->mutable_data()), + reinterpret_cast(rep_levels->mutable_data()), + reinterpret_cast(vals->mutable_data()), &values_read); rows_to_skip -= values_read; } while (values_read > 0 && rows_to_skip > 0); } @@ -403,14 +403,14 @@ typedef TypedColumnReader DoubleReader; typedef TypedColumnReader ByteArrayReader; typedef TypedColumnReader FixedLenByteArrayReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/scanner.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/scanner.h b/src/parquet/column/scanner.h index 914f2ad..a9b83c1 100644 --- a/src/parquet/column/scanner.h +++ b/src/parquet/column/scanner.h @@ -103,8 +103,9 @@ class PARQUET_EXPORT TypedScanner : public Scanner { bool NextLevels(int16_t* def_level, int16_t* rep_level) { if (level_offset_ == levels_buffered_) { - levels_buffered_ = typed_reader_->ReadBatch(batch_size_, def_levels_.data(), - rep_levels_.data(), values_, &values_buffered_); + levels_buffered_ = + static_cast(typed_reader_->ReadBatch(static_cast(batch_size_), + def_levels_.data(), rep_levels_.data(), values_, &values_buffered_)); value_offset_ = 0; level_offset_ = 0; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/statistics-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/statistics-test.cc b/src/parquet/column/statistics-test.cc index d631d98..e656f81 100644 --- a/src/parquet/column/statistics-test.cc +++ b/src/parquet/column/statistics-test.cc @@ -72,7 +72,7 @@ class TestRowGroupStatistics : public PrimitiveTypedTest { TypedStats statistics3(this->schema_.Column(0)); std::vector valid_bits( - BitUtil::RoundUpNumBytes(this->values_.size()) + 1, 255); + BitUtil::RoundUpNumBytes(static_cast(this->values_.size())) + 1, 255); statistics3.UpdateSpaced( this->values_ptr_, valid_bits.data(), 0, this->values_.size(), 0); std::string encoded_min_spaced = statistics3.EncodeMin(); @@ -145,8 +145,8 @@ class TestRowGroupStatistics : public PrimitiveTypedTest { // simulate the case when data comes from multiple buffers, // in which case special care is necessary for FLBA/ByteArray types for (int i = 0; i < 2; i++) { - int batch_num_values = i ? num_values - num_values / 2 : num_values / 2; - int batch_null_count = i ? null_count : 0; + int64_t batch_num_values = i ? num_values - num_values / 2 : num_values / 2; + int64_t batch_null_count = i ? null_count : 0; DCHECK(null_count <= num_values); // avoid too much headache std::vector definition_levels(batch_null_count, 0); definition_levels.insert( http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/statistics.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/statistics.cc b/src/parquet/column/statistics.cc index e67a3d3..961a2af 100644 --- a/src/parquet/column/statistics.cc +++ b/src/parquet/column/statistics.cc @@ -120,7 +120,7 @@ void TypedRowGroupStatistics::UpdateSpaced(const T* values, if (num_not_null == 0) return; Compare compare(descr_); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); // Find first valid entry and use that for min/max // As (num_not_null != 0) there must be one int64_t length = num_null + num_not_null; @@ -216,7 +216,8 @@ void TypedRowGroupStatistics::PlainEncode(const T& src, std::string* dst) template void TypedRowGroupStatistics::PlainDecode(const std::string& src, T* dst) { PlainDecoder decoder(descr()); - decoder.SetData(1, reinterpret_cast(src.c_str()), src.size()); + decoder.SetData( + 1, reinterpret_cast(src.c_str()), static_cast(src.size())); decoder.Decode(dst, 1); } @@ -227,17 +228,17 @@ void TypedRowGroupStatistics::PlainEncode(const T& src, std::stri template <> void TypedRowGroupStatistics::PlainDecode(const std::string& src, T* dst) { - dst->len = src.size(); + dst->len = static_cast(src.size()); dst->ptr = reinterpret_cast(src.c_str()); } -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/statistics.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/statistics.h b/src/parquet/column/statistics.h index 6f12eb9..c6a2487 100644 --- a/src/parquet/column/statistics.h +++ b/src/parquet/column/statistics.h @@ -216,14 +216,14 @@ typedef TypedRowGroupStatistics FLBAStatistics; #pragma GCC diagnostic ignored "-Wattributes" #endif -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/test-specialization.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/test-specialization.h b/src/parquet/column/test-specialization.h index 27781cc..07767c0 100644 --- a/src/parquet/column/test-specialization.h +++ b/src/parquet/column/test-specialization.h @@ -115,7 +115,11 @@ void PrimitiveTypedTest::SyncValuesOut() {} template <> void PrimitiveTypedTest::SyncValuesOut() { - std::copy(bool_buffer_out_.begin(), bool_buffer_out_.end(), values_out_.begin()); + std::vector::const_iterator source_iterator = bool_buffer_out_.begin(); + std::vector::iterator destination_iterator = values_out_.begin(); + while (source_iterator != bool_buffer_out_.end()) { + *destination_iterator++ = *source_iterator++ != 0; + } } template @@ -143,7 +147,7 @@ void PrimitiveTypedTest::GenerateData(int64_t num_values) { def_levels_.resize(num_values); values_.resize(num_values); - InitValues(num_values, values_, buffer_); + InitValues(static_cast(num_values), values_, buffer_); values_ptr_ = values_.data(); std::fill(def_levels_.begin(), def_levels_.end(), 1); @@ -154,7 +158,7 @@ void PrimitiveTypedTest::GenerateData(int64_t num_values) { def_levels_.resize(num_values); values_.resize(num_values); - InitValues(num_values, values_, buffer_); + InitValues(static_cast(num_values), values_, buffer_); bool_buffer_.resize(num_values); std::copy(values_.begin(), values_.end(), bool_buffer_.begin()); values_ptr_ = reinterpret_cast(bool_buffer_.data()); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/test-util.h b/src/parquet/column/test-util.h index 97e936a..c133734 100644 --- a/src/parquet/column/test-util.h +++ b/src/parquet/column/test-util.h @@ -131,7 +131,7 @@ class DataPageBuilder { void AppendValues(const ColumnDescriptor* d, const vector& values, Encoding::type encoding = Encoding::PLAIN) { PlainEncoder encoder(d); - encoder.Put(&values[0], values.size()); + encoder.Put(&values[0], static_cast(values.size())); std::shared_ptr values_sink = encoder.FlushValues(); sink_->Write(values_sink->data(), values_sink->size()); @@ -174,10 +174,10 @@ class DataPageBuilder { // RLE-encoded bytes have to be preceded in the stream by their absolute // size. LevelEncoder encoder; - encoder.Init( - encoding, max_level, levels.size(), encode_buffer.data(), encode_buffer.size()); + encoder.Init(encoding, max_level, static_cast(levels.size()), + encode_buffer.data(), static_cast(encode_buffer.size())); - encoder.Encode(levels.size(), levels.data()); + encoder.Encode(static_cast(levels.size()), levels.data()); int32_t rle_bytes = encoder.len(); sink_->Write(reinterpret_cast(&rle_bytes), sizeof(int32_t)); @@ -192,7 +192,7 @@ void DataPageBuilder::AppendValues( ParquetException::NYI("only plain encoding currently implemented"); } PlainEncoder encoder(d); - encoder.Put(values, values.size()); + encoder.Put(values, static_cast(values.size())); std::shared_ptr buffer = encoder.FlushValues(); sink_->Write(buffer->data(), buffer->size()); @@ -243,7 +243,7 @@ class DictionaryPageBuilder { ~DictionaryPageBuilder() { pool_.FreeAll(); } shared_ptr AppendValues(const vector& values) { - int num_values = values.size(); + int num_values = static_cast(values.size()); // Dictionary encoding encoder_->Put(values.data(), num_values); num_dict_values_ = encoder_->num_entries(); @@ -291,7 +291,7 @@ static shared_ptr MakeDictPage(const ColumnDescriptor* d, Encoding::type encoding, vector>& rle_indices) { InMemoryOutputStream page_stream; test::DictionaryPageBuilder page_builder(d); - int num_pages = values_per_page.size(); + int num_pages = static_cast(values_per_page.size()); int value_start = 0; for (int i = 0; i < num_pages; i++) { @@ -313,7 +313,7 @@ static void PaginateDict(const ColumnDescriptor* d, int16_t max_def_level, const vector& rep_levels, int16_t max_rep_level, int num_levels_per_page, const vector& values_per_page, vector>& pages, Encoding::type encoding = Encoding::RLE_DICTIONARY) { - int num_pages = values_per_page.size(); + int num_pages = static_cast(values_per_page.size()); vector> rle_indices; shared_ptr dict_page = MakeDictPage(d, values, values_per_page, encoding, rle_indices); @@ -332,7 +332,7 @@ static void PaginateDict(const ColumnDescriptor* d, rep_level_end = (i + 1) * num_levels_per_page; } shared_ptr data_page = MakeDataPage(d, {}, values_per_page[i], - encoding, rle_indices[i]->data(), rle_indices[i]->size(), + encoding, rle_indices[i]->data(), static_cast(rle_indices[i]->size()), slice(def_levels, def_level_start, def_level_end), max_def_level, slice(rep_levels, rep_level_start, rep_level_end), max_rep_level); pages.push_back(data_page); @@ -346,7 +346,7 @@ static void PaginatePlain(const ColumnDescriptor* d, int16_t max_def_level, const vector& rep_levels, int16_t max_rep_level, int num_levels_per_page, const vector& values_per_page, vector>& pages, Encoding::type encoding = Encoding::PLAIN) { - int num_pages = values_per_page.size(); + int num_pages = static_cast(values_per_page.size()); int def_level_start = 0; int def_level_end = 0; int rep_level_start = 0; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/writer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc index bd23b06..59f9999 100644 --- a/src/parquet/column/writer.cc +++ b/src/parquet/column/writer.cc @@ -87,19 +87,19 @@ void ColumnWriter::WriteRepetitionLevels(int64_t num_levels, const int16_t* leve int64_t ColumnWriter::RleEncodeLevels( const Buffer& src_buffer, ResizableBuffer* dest_buffer, int16_t max_level) { // TODO: This only works with due to some RLE specifics - int64_t rle_size = - LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, num_buffered_values_) + - sizeof(int32_t); + int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, + static_cast(num_buffered_values_)) + + sizeof(int32_t); // Use Arrow::Buffer::shrink_to_fit = false // underlying buffer only keeps growing. Resize to a smaller size does not reallocate. PARQUET_THROW_NOT_OK(dest_buffer->Resize(rle_size, false)); - level_encoder_.Init(Encoding::RLE, max_level, num_buffered_values_, + level_encoder_.Init(Encoding::RLE, max_level, static_cast(num_buffered_values_), dest_buffer->mutable_data() + sizeof(int32_t), - dest_buffer->size() - sizeof(int32_t)); - int encoded = level_encoder_.Encode( - num_buffered_values_, reinterpret_cast(src_buffer.data())); + static_cast(dest_buffer->size()) - sizeof(int32_t)); + int encoded = level_encoder_.Encode(static_cast(num_buffered_values_), + reinterpret_cast(src_buffer.data())); DCHECK_EQ(encoded, num_buffered_values_); reinterpret_cast(dest_buffer->mutable_data())[0] = level_encoder_.len(); int64_t encoded_size = level_encoder_.len() + sizeof(int32_t); @@ -154,12 +154,13 @@ void ColumnWriter::AddDataPage() { std::shared_ptr compressed_data_copy; PARQUET_THROW_NOT_OK(compressed_data->Copy( 0, compressed_data->size(), allocator_, &compressed_data_copy)); - CompressedDataPage page(compressed_data_copy, num_buffered_values_, encoding_, - Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats); + CompressedDataPage page(compressed_data_copy, + static_cast(num_buffered_values_), encoding_, Encoding::RLE, + Encoding::RLE, uncompressed_size, page_stats); data_pages_.push_back(std::move(page)); } else { // Eagerly write pages - CompressedDataPage page(compressed_data, num_buffered_values_, encoding_, - Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats); + CompressedDataPage page(compressed_data, static_cast(num_buffered_values_), + encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats); WriteDataPage(page); } @@ -170,8 +171,7 @@ void ColumnWriter::AddDataPage() { } void ColumnWriter::WriteDataPage(const CompressedDataPage& page) { - int64_t bytes_written = pager_->WriteDataPage(page); - total_bytes_written_ += bytes_written; + total_bytes_written_ += pager_->WriteDataPage(page); } int64_t ColumnWriter::Close() { @@ -361,7 +361,7 @@ inline int64_t TypedColumnWriter::WriteMiniBatch(int64_t num_values, WriteRepetitionLevels(num_values, rep_levels); } else { // Each value is exactly one row - num_rows_ += num_values; + num_rows_ += static_cast(num_values); } if (num_rows_ > expected_rows_) { @@ -422,7 +422,7 @@ inline int64_t TypedColumnWriter::WriteMiniBatchSpaced(int64_t num_values WriteRepetitionLevels(num_values, rep_levels); } else { // Each value is exactly one row - num_rows_ += num_values; + num_rows_ += static_cast(num_values); } if (num_rows_ > expected_rows_) { @@ -461,7 +461,7 @@ void TypedColumnWriter::WriteBatch(int64_t num_values, const int16_t* def // of values, the chunking will ensure the AddDataPage() is called at a reasonable // pagesize limit int64_t write_batch_size = properties_->write_batch_size(); - int num_batches = num_values / write_batch_size; + int num_batches = static_cast(num_values / write_batch_size); int64_t num_remaining = num_values % write_batch_size; int64_t value_offset = 0; for (int round = 0; round < num_batches; round++) { @@ -486,7 +486,7 @@ void TypedColumnWriter::WriteBatchSpaced(int64_t num_values, // of values, the chunking will ensure the AddDataPage() is called at a reasonable // pagesize limit int64_t write_batch_size = properties_->write_batch_size(); - int num_batches = num_values / write_batch_size; + int num_batches = static_cast(num_values / write_batch_size); int64_t num_remaining = num_values % write_batch_size; int64_t num_spaced_written = 0; int64_t values_offset = 0; @@ -506,22 +506,23 @@ void TypedColumnWriter::WriteBatchSpaced(int64_t num_values, template void TypedColumnWriter::WriteValues(int64_t num_values, const T* values) { - current_encoder_->Put(values, num_values); + current_encoder_->Put(values, static_cast(num_values)); } template void TypedColumnWriter::WriteValuesSpaced(int64_t num_values, const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) { - current_encoder_->PutSpaced(values, num_values, valid_bits, valid_bits_offset); + current_encoder_->PutSpaced( + values, static_cast(num_values), valid_bits, valid_bits_offset); } -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/writer.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/writer.h b/src/parquet/column/writer.h index 305c35e..c7f9ea0 100644 --- a/src/parquet/column/writer.h +++ b/src/parquet/column/writer.h @@ -118,17 +118,17 @@ class PARQUET_EXPORT ColumnWriter { // values. For repeated or optional values, there may be fewer data values // than levels, and this tells you how many encoded levels there are in that // case. - int num_buffered_values_; + int64_t num_buffered_values_; // The total number of stored values. For repeated or optional values, this // number may be lower than num_buffered_values_. - int num_buffered_encoded_values_; + int64_t num_buffered_encoded_values_; // Total number of rows written with this ColumnWriter int num_rows_; // Records the total number of bytes written by the serializer - int total_bytes_written_; + int64_t total_bytes_written_; // Flag to check if the Writer has been closed bool closed_; @@ -212,14 +212,14 @@ typedef TypedColumnWriter DoubleWriter; typedef TypedColumnWriter ByteArrayWriter; typedef TypedColumnWriter FixedLenByteArrayWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/compression-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/compression-test.cc b/src/parquet/compression-test.cc index f4fd3ba..feaf9e3 100644 --- a/src/parquet/compression-test.cc +++ b/src/parquet/compression-test.cc @@ -34,13 +34,13 @@ void CheckCodecRoundtrip(const vector& data) { T c1; T c2; - int max_compressed_len = c1.MaxCompressedLen(data.size(), &data[0]); + int max_compressed_len = static_cast(c1.MaxCompressedLen(data.size(), &data[0])); std::vector compressed(max_compressed_len); std::vector decompressed(data.size()); // compress with c1 - int actual_size = - c1.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]); + int actual_size = static_cast( + c1.Compress(data.size(), &data[0], max_compressed_len, &compressed[0])); compressed.resize(actual_size); // decompress with c2 @@ -49,8 +49,8 @@ void CheckCodecRoundtrip(const vector& data) { ASSERT_TRUE(test::vector_equal(data, decompressed)); // compress with c2 - int actual_size2 = - c2.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]); + int actual_size2 = static_cast( + c2.Compress(data.size(), &data[0], max_compressed_len, &compressed[0])); ASSERT_EQ(actual_size2, actual_size); // decompress with c1 http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/compression.cc ---------------------------------------------------------------------- diff --git a/src/parquet/compression.cc b/src/parquet/compression.cc index 7d219fe..dc6b93d 100644 --- a/src/parquet/compression.cc +++ b/src/parquet/compression.cc @@ -152,9 +152,9 @@ class GZipCodec::GZipCodecImpl { // from the beginning again. while (ret != Z_STREAM_END) { stream_.next_in = const_cast(reinterpret_cast(input)); - stream_.avail_in = input_length; + stream_.avail_in = static_cast(input_length); stream_.next_out = reinterpret_cast(output); - stream_.avail_out = output_length; + stream_.avail_out = static_cast(output_length); // We know the output size. In this case, we can use Z_FINISH // which is more efficient. @@ -188,9 +188,9 @@ class GZipCodec::GZipCodecImpl { uint8_t* output) { if (!compressor_initialized_) { InitCompressor(); } stream_.next_in = const_cast(reinterpret_cast(input)); - stream_.avail_in = input_length; + stream_.avail_in = static_cast(input_length); stream_.next_out = reinterpret_cast(output); - stream_.avail_out = output_length; + stream_.avail_out = static_cast(output_length); int64_t ret = 0; if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/encoding-internal.h ---------------------------------------------------------------------- diff --git a/src/parquet/encoding-internal.h b/src/parquet/encoding-internal.h index 7e90254..7e78123 100644 --- a/src/parquet/encoding-internal.h +++ b/src/parquet/encoding-internal.h @@ -196,7 +196,8 @@ class PlainEncoder : public Encoder { bits_available_(kInMemoryDefaultCapacity * 8), bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)), values_sink_(new InMemoryOutputStream(pool)) { - bit_writer_.reset(new BitWriter(bits_buffer_->mutable_data(), bits_buffer_->size())); + bit_writer_.reset(new BitWriter( + bits_buffer_->mutable_data(), static_cast(bits_buffer_->size()))); } int64_t EstimatedDataEncodedSize() override { @@ -208,7 +209,7 @@ class PlainEncoder : public Encoder { bit_writer_->Flush(); values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written()); bit_writer_->Clear(); - bits_available_ = bits_buffer_->size() * 8; + bits_available_ = static_cast(bits_buffer_->size()) * 8; } std::shared_ptr buffer = values_sink_->GetBuffer(); @@ -236,7 +237,7 @@ class PlainEncoder : public Encoder { \ int bits_remaining = num_values - bit_offset; \ while (bit_offset < num_values) { \ - bits_available_ = bits_buffer_->size() * 8; \ + bits_available_ = static_cast(bits_buffer_->size()) * 8; \ \ int bits_to_write = std::min(bits_available_, bits_remaining); \ for (int i = bit_offset; i < bit_offset + bits_to_write; i++) { \ @@ -463,7 +464,9 @@ class DictEncoder : public Encoder { // reserve // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used // but not reserving them would cause the encoder to fail. - return 1 + RleEncoder::MaxBufferSize(bit_width(), buffered_indices_.size()) + + return 1 + + RleEncoder::MaxBufferSize( + bit_width(), static_cast(buffered_indices_.size())) + RleEncoder::MinBufferSize(bit_width()); } @@ -493,7 +496,8 @@ class DictEncoder : public Encoder { std::shared_ptr FlushValues() override { std::shared_ptr buffer = AllocateBuffer(this->allocator_, EstimatedDataEncodedSize()); - int result_size = WriteIndices(buffer->mutable_data(), EstimatedDataEncodedSize()); + int result_size = WriteIndices( + buffer->mutable_data(), static_cast(EstimatedDataEncodedSize())); ClearIndices(); PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false)); return buffer; @@ -507,7 +511,7 @@ class DictEncoder : public Encoder { void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) override { - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); for (int32_t i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { Put(src[i]); } READ_NEXT_BITSET(valid_bits); @@ -521,7 +525,7 @@ class DictEncoder : public Encoder { ChunkedAllocator* mem_pool() { return pool_; } /// The number of entries in the dictionary. - int num_entries() const { return uniques_.size(); } + int num_entries() const { return static_cast(uniques_.size()); } private: ::arrow::MemoryPool* allocator_; @@ -607,7 +611,7 @@ inline void DictEncoder::Put(const typename DType::c_type& v) { if (index == HASH_SLOT_EMPTY) { // Not in the hash table, so we insert it now - index = uniques_.size(); + index = static_cast(uniques_.size()); hash_slots_[j] = index; AddDictKey(v); @@ -808,7 +812,7 @@ class DeltaBitPackDecoder : public Decoder { int64_t delta; if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException(); delta += min_delta_; - last_value_ += delta; + last_value_ += static_cast(delta); buffer[i] = last_value_; --values_current_mini_block_; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/encoding-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/encoding-test.cc b/src/parquet/encoding-test.cc index fbf6812..2e78036 100644 --- a/src/parquet/encoding-test.cc +++ b/src/parquet/encoding-test.cc @@ -42,7 +42,7 @@ namespace test { TEST(VectorBooleanTest, TestEncodeDecode) { // PARQUET-454 int nvalues = 10000; - int nbytes = BitUtil::Ceil(nvalues, 8); + int nbytes = static_cast(BitUtil::Ceil(nvalues, 8)); // seed the prng so failure is deterministic vector draws = flip_coins_seed(nvalues, 0.5, 0); @@ -58,7 +58,8 @@ TEST(VectorBooleanTest, TestEncodeDecode) { vector decode_buffer(nbytes); const uint8_t* decode_data = &decode_buffer[0]; - decoder.SetData(nvalues, encode_buffer->data(), encode_buffer->size()); + decoder.SetData( + nvalues, encode_buffer->data(), static_cast(encode_buffer->size())); int values_decoded = decoder.Decode(&decode_buffer[0], nvalues); ASSERT_EQ(nvalues, values_decoded); @@ -218,7 +219,8 @@ class TestPlainEncoding : public TestEncodingBase { encoder.Put(draws_, num_values_); encode_buffer_ = encoder.FlushValues(); - decoder.SetData(num_values_, encode_buffer_->data(), encode_buffer_->size()); + decoder.SetData( + num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); int values_decoded = decoder.Decode(decode_buf_, num_values_); ASSERT_EQ(num_values_, values_decoded); VerifyResults(decode_buf_, draws_, num_values_); @@ -263,13 +265,13 @@ class TestDictionaryEncoding : public TestEncodingBase { ASSERT_TRUE(indices_from_spaced->Equals(*indices)); PlainDecoder dict_decoder(descr_.get()); - dict_decoder.SetData( - encoder.num_entries(), dict_buffer_->data(), dict_buffer_->size()); + dict_decoder.SetData(encoder.num_entries(), dict_buffer_->data(), + static_cast(dict_buffer_->size())); DictionaryDecoder decoder(descr_.get()); decoder.SetDict(&dict_decoder); - decoder.SetData(num_values_, indices->data(), indices->size()); + decoder.SetData(num_values_, indices->data(), static_cast(indices->size())); int values_decoded = decoder.Decode(decode_buf_, num_values_); ASSERT_EQ(num_values_, values_decoded); @@ -279,7 +281,7 @@ class TestDictionaryEncoding : public TestEncodingBase { VerifyResults(decode_buf_, draws_, num_values_); // Also test spaced decoding - decoder.SetData(num_values_, indices->data(), indices->size()); + decoder.SetData(num_values_, indices->data(), static_cast(indices->size())); values_decoded = decoder.DecodeSpaced(decode_buf_, num_values_, 0, valid_bits.data(), 0); ASSERT_EQ(num_values_, values_decoded); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encoding.h b/src/parquet/encoding.h index 69fc40e..47f2b75 100644 --- a/src/parquet/encoding.h +++ b/src/parquet/encoding.h @@ -52,7 +52,7 @@ class Encoder { PoolBuffer buffer(pool_); buffer.Resize(num_values * sizeof(T)); int32_t num_valid_values = 0; - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); T* data = reinterpret_cast(buffer.mutable_data()); for (int32_t i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {