parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [parquet-cpp] branch master updated: PARQUET-1164: [C++] Account for API changes in ARROW-1808
Date Wed, 22 Nov 2017 14:32:25 GMT
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-cpp.git


The following commit(s) were added to refs/heads/master by this push:
     new 9b39fbd  PARQUET-1164: [C++] Account for API changes in ARROW-1808
9b39fbd is described below

commit 9b39fbd7ce67fdaf20ec7cfcd842a31013641168
Author: Wes McKinney <wes.mckinney@twosigma.com>
AuthorDate: Wed Nov 22 09:32:21 2017 -0500

    PARQUET-1164: [C++] Account for API changes in ARROW-1808
    
    Author: Wes McKinney <wes.mckinney@twosigma.com>
    
    Closes #418 from wesm/PARQUET-1164 and squashes the following commits:
    
    ca18e60 [Wes McKinney] Bump Arrow version to include ARROW-1808
    d580b4f [Wes McKinney] Refactor to account for ARROW-1808
---
 cmake_modules/ThirdpartyToolchain.cmake            |  2 +-
 src/parquet/arrow/arrow-reader-writer-benchmark.cc |  9 ++--
 src/parquet/arrow/arrow-reader-writer-test.cc      | 55 ++++++++++------------
 src/parquet/arrow/arrow-schema-test.cc             |  4 +-
 src/parquet/arrow/reader.cc                        |  4 +-
 src/parquet/arrow/test-util.h                      |  2 +-
 src/parquet/file/reader.cc                         | 18 +++----
 7 files changed, 44 insertions(+), 50 deletions(-)

diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index fe1d499..53630e6 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -366,7 +366,7 @@ if (NOT ARROW_FOUND)
     -DARROW_BUILD_TESTS=OFF)
 
   if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "")
-    set(ARROW_VERSION "f2806fa518583907a129b2ecb0b7ec8758b69e17")
+    set(ARROW_VERSION "fc4e2c36d2c56a8bd5d1ab17eeb406826924d3e5")
   else()
     set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}")
   endif()
diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
index a54fb5d..edeef1e 100644
--- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc
+++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
@@ -112,11 +112,9 @@ std::shared_ptr<::arrow::Table> TableFromVector(
   EXIT_NOT_OK(builder.Finish(&array));
 
   auto field = ::arrow::field("column", type, nullable);
-  auto schema = std::make_shared<::arrow::Schema>(
-      std::vector<std::shared_ptr<::arrow::Field>>({field}));
+  auto schema = ::arrow::schema({field});
   auto column = std::make_shared<::arrow::Column>(field, array);
-  return std::make_shared<::arrow::Table>(
-      schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
+  return ::arrow::Table::Make(schema, {column});
 }
 
 template <>
@@ -139,8 +137,7 @@ std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const
std::vector<b
   auto schema = std::make_shared<::arrow::Schema>(
       std::vector<std::shared_ptr<::arrow::Field>>({field}));
   auto column = std::make_shared<::arrow::Column>(field, array);
-  return std::make_shared<::arrow::Table>(
-      schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
+  return ::arrow::Table::Make(schema, {column});
 }
 
 template <bool nullable, typename ParquetType>
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 0e0831e..a8d3824 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -23,8 +23,8 @@
 
 #include "gtest/gtest.h"
 
-#include <sstream>
 #include <arrow/compute/api.h>
+#include <sstream>
 
 #include "parquet/api/reader.h"
 #include "parquet/api/writer.h"
@@ -1145,7 +1145,7 @@ void MakeDateTimeTypesTable(std::shared_ptr<Table>* out, bool
nanos_as_micros =
       std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1),
       std::make_shared<Column>("f2", a2), std::make_shared<Column>("f3", a3),
       std::make_shared<Column>("f4", a4), std::make_shared<Column>("f5", a5)};
-  *out = std::make_shared<::arrow::Table>(schema, columns);
+  *out = Table::Make(schema, columns);
 }
 
 TEST(TestArrowReadWrite, DateTimeTypes) {
@@ -1199,31 +1199,28 @@ TEST(TestArrowReadWrite, CoerceTimestamps) {
   auto s1 = std::shared_ptr<::arrow::Schema>(
       new ::arrow::Schema({field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us),
                            field("f_ns", t_ns)}));
-  auto input = std::make_shared<::arrow::Table>(
-      s1, ColumnVector({std::make_shared<Column>("f_s", a_s),
-                        std::make_shared<Column>("f_ms", a_ms),
-                        std::make_shared<Column>("f_us", a_us),
-                        std::make_shared<Column>("f_ns", a_ns)}));
+  auto input = Table::Make(
+      s1,
+      {std::make_shared<Column>("f_s", a_s), std::make_shared<Column>("f_ms",
a_ms),
+       std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns",
a_ns)});
 
   // Result when coercing to milliseconds
   auto s2 = std::shared_ptr<::arrow::Schema>(
       new ::arrow::Schema({field("f_s", t_ms), field("f_ms", t_ms), field("f_us", t_ms),
                            field("f_ns", t_ms)}));
-  auto ex_milli_result = std::make_shared<::arrow::Table>(
-      s2, ColumnVector({std::make_shared<Column>("f_s", a_ms),
-                        std::make_shared<Column>("f_ms", a_ms),
-                        std::make_shared<Column>("f_us", a_ms),
-                        std::make_shared<Column>("f_ns", a_ms)}));
+  auto ex_milli_result = Table::Make(
+      s2,
+      {std::make_shared<Column>("f_s", a_ms), std::make_shared<Column>("f_ms",
a_ms),
+       std::make_shared<Column>("f_us", a_ms), std::make_shared<Column>("f_ns",
a_ms)});
 
   // Result when coercing to microseconds
   auto s3 = std::shared_ptr<::arrow::Schema>(
       new ::arrow::Schema({field("f_s", t_us), field("f_ms", t_us), field("f_us", t_us),
                            field("f_ns", t_us)}));
-  auto ex_micro_result = std::make_shared<::arrow::Table>(
-      s3, ColumnVector({std::make_shared<Column>("f_s", a_us),
-                        std::make_shared<Column>("f_ms", a_us),
-                        std::make_shared<Column>("f_us", a_us),
-                        std::make_shared<Column>("f_ns", a_us)}));
+  auto ex_micro_result = Table::Make(
+      s3,
+      {std::make_shared<Column>("f_s", a_us), std::make_shared<Column>("f_ms",
a_us),
+       std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns",
a_us)});
 
   std::shared_ptr<Table> milli_result;
   DoSimpleRoundtrip(
@@ -1276,10 +1273,10 @@ TEST(TestArrowReadWrite, CoerceTimestampsLosePrecision) {
   auto c3 = std::make_shared<Column>("f_us", a_us);
   auto c4 = std::make_shared<Column>("f_ns", a_ns);
 
-  auto t1 = std::make_shared<::arrow::Table>(s1, ColumnVector({c1}));
-  auto t2 = std::make_shared<::arrow::Table>(s2, ColumnVector({c2}));
-  auto t3 = std::make_shared<::arrow::Table>(s3, ColumnVector({c3}));
-  auto t4 = std::make_shared<::arrow::Table>(s4, ColumnVector({c4}));
+  auto t1 = Table::Make(s1, {c1});
+  auto t2 = Table::Make(s2, {c2});
+  auto t3 = Table::Make(s3, {c3});
+  auto t4 = Table::Make(s4, {c4});
 
   auto sink = std::make_shared<InMemoryOutputStream>();
 
@@ -1327,7 +1324,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
 
   std::vector<std::shared_ptr<::arrow::Column>> columns = {
       std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1)};
-  auto table = std::make_shared<::arrow::Table>(schema, columns);
+  auto table = Table::Make(schema, columns);
 
   // Expected schema and values
   auto e0 = field("f0", ::arrow::date32());
@@ -1341,7 +1338,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
 
   std::vector<std::shared_ptr<::arrow::Column>> ex_columns = {
       std::make_shared<Column>("f0", x0), std::make_shared<Column>("f1", x1)};
-  auto ex_table = std::make_shared<::arrow::Table>(ex_schema, ex_columns);
+  auto ex_table = Table::Make(ex_schema, ex_columns);
 
   std::shared_ptr<Table> result;
   DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result);
@@ -1372,7 +1369,7 @@ void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
     fields[i] = column->field();
   }
   auto schema = std::make_shared<::arrow::Schema>(fields);
-  *out = std::make_shared<Table>(schema, columns);
+  *out = Table::Make(schema, columns);
 }
 
 TEST(TestArrowReadWrite, MultithreadedRead) {
@@ -1459,9 +1456,9 @@ TEST(TestArrowReadWrite, ReadColumnSubset) {
     ex_fields.push_back(table->column(i)->field());
   }
 
-  auto ex_schema = std::make_shared<::arrow::Schema>(ex_fields);
-  Table expected(ex_schema, ex_columns);
-  AssertTablesEqual(expected, *result);
+  auto ex_schema = ::arrow::schema(ex_fields);
+  auto expected = Table::Make(ex_schema, ex_columns);
+  AssertTablesEqual(*expected, *result);
 }
 
 void MakeListTable(int num_rows, std::shared_ptr<Table>* out) {
@@ -1501,7 +1498,7 @@ void MakeListTable(int num_rows, std::shared_ptr<Table>* out)
{
   auto f1 = ::arrow::field("a", ::arrow::list(::arrow::int8()));
   auto schema = ::arrow::schema({f1});
   std::vector<std::shared_ptr<Array>> arrays = {list_array};
-  *out = std::make_shared<Table>(schema, arrays);
+  *out = Table::Make(schema, arrays);
 }
 
 TEST(TestArrowReadWrite, ListLargeRecords) {
@@ -1544,7 +1541,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
   auto chunked_col =
       std::make_shared<::arrow::Column>(table->schema()->field(0), chunked);
   std::vector<std::shared_ptr<::arrow::Column>> columns = {chunked_col};
-  auto chunked_table = std::make_shared<Table>(table->schema(), columns);
+  auto chunked_table = Table::Make(table->schema(), columns);
 
   ASSERT_TRUE(table->Equals(*chunked_table));
 }
diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc
index 7ed9ad8..129eccf 100644
--- a/src/parquet/arrow/arrow-schema-test.cc
+++ b/src/parquet/arrow/arrow-schema-test.cc
@@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test {
     for (int i = 0; i < expected_schema->num_fields(); ++i) {
       auto lhs = result_schema_->field(i);
       auto rhs = expected_schema->field(i);
-      EXPECT_TRUE(lhs->Equals(rhs))
-          << i << " " << lhs->ToString() << " != " << rhs->ToString();
+      EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString()
+                                    << " != " << rhs->ToString();
     }
   }
 
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 3ca49cb..e13a094 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -431,7 +431,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
     RETURN_NOT_OK(ParallelFor(nthreads, num_columns, ReadColumnFunc));
   }
 
-  *out = std::make_shared<Table>(schema, columns);
+  *out = Table::Make(schema, columns);
   return Status::OK();
 }
 
@@ -466,7 +466,7 @@ Status FileReader::Impl::ReadTable(const std::vector<int>& indices,
     RETURN_NOT_OK(ParallelFor(nthreads, num_fields, ReadColumnFunc));
   }
 
-  *table = std::make_shared<Table>(schema, columns);
+  *table = Table::Make(schema, columns);
   return Status::OK();
 }
 
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 8611a30..7264324 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -414,7 +414,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr<Array>&
va
   std::vector<std::shared_ptr<::arrow::Column>> columns({column});
   std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
   auto schema = std::make_shared<::arrow::Schema>(fields);
-  return std::make_shared<::arrow::Table>(schema, columns);
+  return ::arrow::Table::Make(schema, columns);
 }
 
 template <typename T>
diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc
index 4ec48a4..9b9bde9 100644
--- a/src/parquet/file/reader.cc
+++ b/src/parquet/file/reader.cc
@@ -45,9 +45,9 @@ RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents)
     : contents_(std::move(contents)) {}
 
 std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
-  DCHECK(i < metadata()->num_columns())
-      << "The RowGroup only has " << metadata()->num_columns()
-      << "columns, requested column: " << i;
+  DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
+                                        << metadata()->num_columns()
+                                        << "columns, requested column: " << i;
   const ColumnDescriptor* descr = metadata()->schema()->Column(i);
 
   std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
@@ -57,9 +57,9 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
 }
 
 std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
-  DCHECK(i < metadata()->num_columns())
-      << "The RowGroup only has " << metadata()->num_columns()
-      << "columns, requested column: " << i;
+  DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
+                                        << metadata()->num_columns()
+                                        << "columns, requested column: " << i;
   return contents_->GetColumnPageReader(i);
 }
 
@@ -127,9 +127,9 @@ std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const
{
 }
 
 std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
-  DCHECK(i < metadata()->num_row_groups())
-      << "The file only has " << metadata()->num_row_groups()
-      << "row groups, requested reader for: " << i;
+  DCHECK(i < metadata()->num_row_groups()) << "The file only has "
+                                           << metadata()->num_row_groups()
+                                           << "row groups, requested reader for: "
<< i;
   return contents_->GetRowGroup(i);
 }
 

-- 
To stop receiving notification emails like this one, please contact
['"commits@parquet.apache.org" <commits@parquet.apache.org>'].

Mime
View raw message