parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject parquet-cpp git commit: PARQUET-933: Account for API changes in ARROW-728
Date Thu, 30 Mar 2017 21:32:07 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 1580d56d4 -> a48bfaa7e


PARQUET-933: Account for API changes in ARROW-728

Requires https://github.com/apache/arrow/pull/457

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #280 from wesm/PARQUET-933 and squashes the following commits:

5a4fdeb [Wes McKinney] Use EP_CXX_FLAGS
d23acce [Wes McKinney] Upgrade to gbenchmark 1.1.0
8cb1191 [Wes McKinney] Fix benchmarks
29c48c5 [Wes McKinney] Update Arrow version
e1af3f0 [Wes McKinney] Account for API changes in ARROW-728


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/a48bfaa7
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/a48bfaa7
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/a48bfaa7

Branch: refs/heads/master
Commit: a48bfaa7e9c649b120eb3c88e0234695042d5d4e
Parents: 1580d56
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Thu Mar 30 17:32:00 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Thu Mar 30 17:32:00 2017 -0400

----------------------------------------------------------------------
 cmake_modules/ThirdpartyToolchain.cmake         |  9 ++++--
 .../arrow/arrow-reader-writer-benchmark.cc      |  2 +-
 src/parquet/arrow/arrow-reader-writer-test.cc   |  6 ++--
 src/parquet/arrow/reader.cc                     |  3 +-
 src/parquet/arrow/test-util.h                   |  4 +--
 src/parquet/column/column-io-benchmark.cc       | 32 ++++++++++----------
 src/parquet/column/level-benchmark.cc           | 22 +++++++-------
 src/parquet/encoding-benchmark.cc               | 24 +++++++--------
 8 files changed, 53 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index b4340d0..ecd9bca 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -16,13 +16,13 @@
 # under the License.
 
 set(GTEST_VERSION "1.7.0")
-set(GBENCHMARK_VERSION "1.0.0")
+set(GBENCHMARK_VERSION "1.1.0")
 set(SNAPPY_VERSION "1.1.3")
 set(THRIFT_VERSION "0.10.0")
 
 # Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does
 set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd")
-set(ARROW_VERSION "c7947dc2d08a0a2295016d34db201cc38a38360c")
+set(ARROW_VERSION "15b874e47e3975c5240290ec7ed105bf8d1b56bc")
 
 # find boost headers and libs
 # Find shared Boost libraries.
@@ -311,6 +311,7 @@ endif()
 if(PARQUET_BUILD_BENCHMARKS)
   add_custom_target(runbenchmark ctest -L benchmark)
 
+
   if("$ENV{GBENCHMARK_HOME}" STREQUAL "")
     set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
     set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
@@ -319,7 +320,11 @@ if(PARQUET_BUILD_BENCHMARKS)
     set(GBENCHMARK_CMAKE_ARGS
           "-DCMAKE_BUILD_TYPE=Release"
           "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
+          "-DBENCHMARK_ENABLE_TESTING=OFF"
           "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}")
+    if (APPLE)
+      set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
+    endif()
     if (CMAKE_VERSION VERSION_GREATER "3.2")
       # BUILD_BYPRODUCTS is a 3.2+ feature
       ExternalProject_Add(gbenchmark_ep

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/arrow-reader-writer-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
index c4a4777..7d8c107 100644
--- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc
+++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
@@ -98,7 +98,7 @@ std::shared_ptr<::arrow::Table> TableFromVector(
       std::vector<std::shared_ptr<::arrow::Field>>({field}));
   auto column = std::make_shared<::arrow::Column>(field, array);
   return std::make_shared<::arrow::Table>(
-      "table", schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
+      schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
 }
 
 template <bool nullable, typename ParquetType>

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 4598cab..3b232f9 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -773,7 +773,7 @@ void MakeDoubleTable(int num_columns, int num_rows, std::shared_ptr<Table>*
out)
     fields[i] = column->field();
   }
   auto schema = std::make_shared<::arrow::Schema>(fields);
-  *out = std::make_shared<Table>("schema", schema, columns);
+  *out = std::make_shared<Table>(schema, columns);
 }
 
 void DoTableRoundtrip(const std::shared_ptr<Table>& table, int num_threads,
@@ -810,7 +810,7 @@ TEST(TestArrowReadWrite, MultithreadedRead) {
   std::shared_ptr<Table> result;
   DoTableRoundtrip(table, num_threads, {}, &result);
 
-  ASSERT_TRUE(table->Equals(result));
+  ASSERT_TRUE(table->Equals(*result));
 }
 
 TEST(TestArrowReadWrite, ReadColumnSubset) {
@@ -833,7 +833,7 @@ TEST(TestArrowReadWrite, ReadColumnSubset) {
   }
 
   auto ex_schema = std::make_shared<::arrow::Schema>(ex_fields);
-  auto expected = std::make_shared<Table>("schema", ex_schema, ex_columns);
+  Table expected(ex_schema, ex_columns);
   ASSERT_TRUE(result->Equals(expected));
 }
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index d1bf38e..53600b4 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -210,7 +210,6 @@ Status FileReader::Impl::ReadTable(
     const std::vector<int>& indices, std::shared_ptr<Table>* table) {
   auto descr = reader_->metadata()->schema();
 
-  const std::string& name = descr->name();
   std::shared_ptr<::arrow::Schema> schema;
   RETURN_NOT_OK(FromParquetSchema(descr, indices, &schema));
 
@@ -233,7 +232,7 @@ Status FileReader::Impl::ReadTable(
     RETURN_NOT_OK(ParallelFor(nthreads, num_columns, ReadColumnFunc));
   }
 
-  *table = std::make_shared<Table>(name, schema, columns);
+  *table = std::make_shared<Table>(schema, columns);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 1cf1376..2cfc60a 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -272,7 +272,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable(
   std::vector<std::shared_ptr<::arrow::Column>> columns({column});
   std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
   auto schema = std::make_shared<::arrow::Schema>(fields);
-  return std::make_shared<::arrow::Table>("table", schema, columns);
+  return std::make_shared<::arrow::Table>(schema, columns);
 }
 
 template <typename T>
@@ -300,7 +300,7 @@ void ExpectArrayT<::arrow::BooleanType>(void* expected, Array* result)
{
 
   std::shared_ptr<Array> expected_array;
   EXPECT_OK(builder.Finish(&expected_array));
-  EXPECT_TRUE(result->Equals(expected_array));
+  EXPECT_TRUE(result->Equals(*expected_array));
 }
 
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/column/column-io-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/column-io-benchmark.cc b/src/parquet/column/column-io-benchmark.cc
index fb491b9..24afab2 100644
--- a/src/parquet/column/column-io-benchmark.cc
+++ b/src/parquet/column/column-io-benchmark.cc
@@ -45,22 +45,22 @@ std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition)
{
 }
 
 void SetBytesProcessed(::benchmark::State& state, Repetition::type repetition) {
-  int64_t bytes_processed = state.iterations() * state.range_x() * sizeof(int64_t);
+  int64_t bytes_processed = state.iterations() * state.range(0) * sizeof(int64_t);
   if (repetition != Repetition::REQUIRED) {
-    bytes_processed += state.iterations() * state.range_x() * sizeof(int16_t);
+    bytes_processed += state.iterations() * state.range(0) * sizeof(int16_t);
   }
   if (repetition == Repetition::REPEATED) {
-    bytes_processed += state.iterations() * state.range_x() * sizeof(int16_t);
+    bytes_processed += state.iterations() * state.range(0) * sizeof(int16_t);
   }
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t));
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t));
 }
 
 template <Repetition::type repetition>
 static void BM_WriteInt64Column(::benchmark::State& state) {
   format::ColumnChunk thrift_metadata;
-  std::vector<int64_t> values(state.range_x(), 128);
-  std::vector<int16_t> definition_levels(state.range_x(), 1);
-  std::vector<int16_t> repetition_levels(state.range_x(), 0);
+  std::vector<int64_t> values(state.range(0), 128);
+  std::vector<int16_t> definition_levels(state.range(0), 1);
+  std::vector<int16_t> repetition_levels(state.range(0), 0);
   std::shared_ptr<ColumnDescriptor> schema = Int64Schema(repetition);
   std::shared_ptr<WriterProperties> properties = default_writer_properties();
   auto metadata = ColumnChunkMetaDataBuilder::Make(
@@ -69,7 +69,7 @@ static void BM_WriteInt64Column(::benchmark::State& state) {
   while (state.KeepRunning()) {
     InMemoryOutputStream stream;
     std::unique_ptr<Int64Writer> writer = BuildWriter(
-        state.range_x(), &stream, metadata.get(), schema.get(), properties.get());
+        state.range(0), &stream, metadata.get(), schema.get(), properties.get());
     writer->WriteBatch(
         values.size(), definition_levels.data(), repetition_levels.data(), values.data());
     writer->Close();
@@ -94,9 +94,9 @@ std::unique_ptr<Int64Reader> BuildReader(
 template <Repetition::type repetition>
 static void BM_ReadInt64Column(::benchmark::State& state) {
   format::ColumnChunk thrift_metadata;
-  std::vector<int64_t> values(state.range_x(), 128);
-  std::vector<int16_t> definition_levels(state.range_x(), 1);
-  std::vector<int16_t> repetition_levels(state.range_x(), 0);
+  std::vector<int64_t> values(state.range(0), 128);
+  std::vector<int16_t> definition_levels(state.range(0), 1);
+  std::vector<int16_t> repetition_levels(state.range(0), 0);
   std::shared_ptr<ColumnDescriptor> schema = Int64Schema(repetition);
   std::shared_ptr<WriterProperties> properties = default_writer_properties();
   auto metadata = ColumnChunkMetaDataBuilder::Make(
@@ -104,17 +104,17 @@ static void BM_ReadInt64Column(::benchmark::State& state) {
 
   InMemoryOutputStream stream;
   std::unique_ptr<Int64Writer> writer = BuildWriter(
-      state.range_x(), &stream, metadata.get(), schema.get(), properties.get());
+      state.range(0), &stream, metadata.get(), schema.get(), properties.get());
   writer->WriteBatch(
       values.size(), definition_levels.data(), repetition_levels.data(), values.data());
   writer->Close();
 
   std::shared_ptr<Buffer> src = stream.GetBuffer();
-  std::vector<int64_t> values_out(state.range_y());
-  std::vector<int16_t> definition_levels_out(state.range_y());
-  std::vector<int16_t> repetition_levels_out(state.range_y());
+  std::vector<int64_t> values_out(state.range(1));
+  std::vector<int16_t> definition_levels_out(state.range(1));
+  std::vector<int16_t> repetition_levels_out(state.range(1));
   while (state.KeepRunning()) {
-    std::unique_ptr<Int64Reader> reader = BuildReader(src, state.range_y(), schema.get());
+    std::unique_ptr<Int64Reader> reader = BuildReader(src, state.range(1), schema.get());
     int64_t values_read = 0;
     for (size_t i = 0; i < values.size(); i += values_read) {
       reader->ReadBatch(values_out.size(), definition_levels_out.data(),

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/column/level-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/level-benchmark.cc b/src/parquet/column/level-benchmark.cc
index 036108f..34c7218 100644
--- a/src/parquet/column/level-benchmark.cc
+++ b/src/parquet/column/level-benchmark.cc
@@ -25,10 +25,10 @@ namespace parquet {
 namespace benchmark {
 
 static void BM_RleEncoding(::benchmark::State& state) {
-  std::vector<int16_t> levels(state.range_x(), 0);
+  std::vector<int16_t> levels(state.range(0), 0);
   int64_t n = 0;
-  std::generate(levels.begin(), levels.end(),
-      [&state, &n] { return (n++ % state.range_y()) == 0; });
+  std::generate(
+      levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1))
== 0; });
   int16_t max_level = 1;
   int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size());
   auto buffer_rle = std::make_shared<PoolBuffer>();
@@ -40,18 +40,18 @@ static void BM_RleEncoding(::benchmark::State& state) {
         buffer_rle->mutable_data(), buffer_rle->size());
     level_encoder.Encode(levels.size(), levels.data());
   }
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t));
-  state.SetItemsProcessed(state.iterations() * state.range_x());
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
 }
 
 BENCHMARK(BM_RleEncoding)->RangePair(1024, 65536, 1, 16);
 
 static void BM_RleDecoding(::benchmark::State& state) {
   LevelEncoder level_encoder;
-  std::vector<int16_t> levels(state.range_x(), 0);
+  std::vector<int16_t> levels(state.range(0), 0);
   int64_t n = 0;
-  std::generate(levels.begin(), levels.end(),
-      [&state, &n] { return (n++ % state.range_y()) == 0; });
+  std::generate(
+      levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1))
== 0; });
   int16_t max_level = 1;
   int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size());
   auto buffer_rle = std::make_shared<PoolBuffer>();
@@ -64,11 +64,11 @@ static void BM_RleDecoding(::benchmark::State& state) {
   while (state.KeepRunning()) {
     LevelDecoder level_decoder;
     level_decoder.SetData(Encoding::RLE, max_level, levels.size(), buffer_rle->data());
-    level_decoder.Decode(state.range_x(), levels.data());
+    level_decoder.Decode(state.range(0), levels.data());
   }
 
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t));
-  state.SetItemsProcessed(state.iterations() * state.range_x());
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
 }
 
 BENCHMARK(BM_RleDecoding)->RangePair(1024, 65536, 1, 16);

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/encoding-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-benchmark.cc b/src/parquet/encoding-benchmark.cc
index 8ea684a..1e93ba7 100644
--- a/src/parquet/encoding-benchmark.cc
+++ b/src/parquet/encoding-benchmark.cc
@@ -38,21 +38,21 @@ std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition)
{
 }
 
 static void BM_PlainEncodingBoolean(::benchmark::State& state) {
-  std::vector<bool> values(state.range_x(), 64);
+  std::vector<bool> values(state.range(0), 64);
   PlainEncoder<BooleanType> encoder(nullptr);
 
   while (state.KeepRunning()) {
     encoder.Put(values, values.size());
     encoder.FlushValues();
   }
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(bool));
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
 }
 
 BENCHMARK(BM_PlainEncodingBoolean)->Range(1024, 65536);
 
 static void BM_PlainDecodingBoolean(::benchmark::State& state) {
-  std::vector<bool> values(state.range_x(), 64);
-  bool* output = new bool[state.range_x()];
+  std::vector<bool> values(state.range(0), 64);
+  bool* output = new bool[state.range(0)];
   PlainEncoder<BooleanType> encoder(nullptr);
   encoder.Put(values, values.size());
   std::shared_ptr<Buffer> buf = encoder.FlushValues();
@@ -63,27 +63,27 @@ static void BM_PlainDecodingBoolean(::benchmark::State& state) {
     decoder.Decode(output, values.size());
   }
 
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(bool));
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
   delete[] output;
 }
 
 BENCHMARK(BM_PlainDecodingBoolean)->Range(1024, 65536);
 
 static void BM_PlainEncodingInt64(::benchmark::State& state) {
-  std::vector<int64_t> values(state.range_x(), 64);
+  std::vector<int64_t> values(state.range(0), 64);
   PlainEncoder<Int64Type> encoder(nullptr);
 
   while (state.KeepRunning()) {
     encoder.Put(values.data(), values.size());
     encoder.FlushValues();
   }
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int64_t));
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t));
 }
 
 BENCHMARK(BM_PlainEncodingInt64)->Range(1024, 65536);
 
 static void BM_PlainDecodingInt64(::benchmark::State& state) {
-  std::vector<int64_t> values(state.range_x(), 64);
+  std::vector<int64_t> values(state.range(0), 64);
   PlainEncoder<Int64Type> encoder(nullptr);
   encoder.Put(values.data(), values.size());
   std::shared_ptr<Buffer> buf = encoder.FlushValues();
@@ -93,7 +93,7 @@ static void BM_PlainDecodingInt64(::benchmark::State& state) {
     decoder.SetData(values.size(), buf->data(), buf->size());
     decoder.Decode(values.data(), values.size());
   }
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int64_t));
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t));
 }
 
 BENCHMARK(BM_PlainDecodingInt64)->Range(1024, 65536);
@@ -133,14 +133,14 @@ static void DecodeDict(
     decoder.Decode(values.data(), num_values);
   }
 
-  state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(T));
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T));
 }
 
 static void BM_DictDecodingInt64_repeats(::benchmark::State& state) {
   typedef Int64Type Type;
   typedef typename Type::c_type T;
 
-  std::vector<T> values(state.range_x(), 64);
+  std::vector<T> values(state.range(0), 64);
   DecodeDict<Type>(values, state);
 }
 
@@ -150,7 +150,7 @@ static void BM_DictDecodingInt64_literals(::benchmark::State& state)
{
   typedef Int64Type Type;
   typedef typename Type::c_type T;
 
-  std::vector<T> values(state.range_x());
+  std::vector<T> values(state.range(0));
   for (size_t i = 0; i < values.size(); ++i) {
     values[i] = i;
   }


Mime
View raw message