parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject parquet-cpp git commit: PARQUET-830: Add parquet::arrow::OpenFile with additional properties and metadata args
Date Thu, 12 Jan 2017 07:57:44 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master c0870f7ab -> 0804faf4f


PARQUET-830: Add parquet::arrow::OpenFile with additional properties and metadata args

I also slightly refactored the test suite to use OpenFile rather than using the `ParquetFileReader`
ctor directly (`OpenFile` wasn't being used in the test suite).

Needed for ARROW-471

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #219 from wesm/PARQUET-830 and squashes the following commits:

bd17192 [Wes McKinney] Add parquet::arrow::OpenFile with additional properties and metadata
arguments


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/0804faf4
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/0804faf4
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/0804faf4

Branch: refs/heads/master
Commit: 0804faf4fc8ecb448643d107f9cfe60021460546
Parents: c0870f7
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Thu Jan 12 08:57:25 2017 +0100
Committer: Uwe L. Korn <uwelk@xhochy.com>
Committed: Thu Jan 12 08:57:25 2017 +0100

----------------------------------------------------------------------
 src/parquet/arrow/arrow-reader-writer-test.cc | 45 ++++++++++++++--------
 src/parquet/arrow/reader.cc                   | 14 +++++--
 src/parquet/arrow/reader.h                    |  7 ++++
 3 files changed, 46 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0804faf4/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index a329480..2089abd 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -203,16 +203,17 @@ class TestParquetIO : public ::testing::Test {
     return ParquetFileWriter::Open(sink_, schema);
   }
 
-  std::unique_ptr<ParquetFileReader> ReaderFromSink() {
+  void ReaderFromSink(std::unique_ptr<FileReader>* out) {
     std::shared_ptr<Buffer> buffer = sink_->GetBuffer();
-    return ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
+    ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
+            ::arrow::default_memory_pool(), ::parquet::default_reader_properties(),
+            nullptr, out));
   }
 
   void ReadSingleColumnFile(
-      std::unique_ptr<ParquetFileReader> file_reader, std::shared_ptr<Array>*
out) {
-    FileReader reader(::arrow::default_memory_pool(), std::move(file_reader));
+      std::unique_ptr<FileReader> file_reader, std::shared_ptr<Array>* out) {
     std::unique_ptr<FlatColumnReader> column_reader;
-    ASSERT_OK_NO_THROW(reader.GetFlatColumn(0, &column_reader));
+    ASSERT_OK_NO_THROW(file_reader->GetFlatColumn(0, &column_reader));
     ASSERT_NE(nullptr, column_reader.get());
 
     ASSERT_OK(column_reader->NextBatch(SMALL_SIZE, out));
@@ -221,20 +222,24 @@ class TestParquetIO : public ::testing::Test {
 
   void ReadAndCheckSingleColumnFile(::arrow::Array* values) {
     std::shared_ptr<::arrow::Array> out;
-    ReadSingleColumnFile(ReaderFromSink(), &out);
+
+    std::unique_ptr<FileReader> reader;
+    ReaderFromSink(&reader);
+    ReadSingleColumnFile(std::move(reader), &out);
     ASSERT_TRUE(values->Equals(out));
   }
 
   void ReadTableFromFile(
-      std::unique_ptr<ParquetFileReader> file_reader, std::shared_ptr<Table>*
out) {
-    FileReader reader(::arrow::default_memory_pool(), std::move(file_reader));
-    ASSERT_OK_NO_THROW(reader.ReadFlatTable(out));
+      std::unique_ptr<FileReader> reader, std::shared_ptr<Table>* out) {
+    ASSERT_OK_NO_THROW(reader->ReadFlatTable(out));
     ASSERT_NE(nullptr, out->get());
   }
 
   void ReadAndCheckSingleColumnTable(const std::shared_ptr<::arrow::Array>& values)
{
     std::shared_ptr<::arrow::Table> out;
-    ReadTableFromFile(ReaderFromSink(), &out);
+    std::unique_ptr<FileReader> reader;
+    ReaderFromSink(&reader);
+    ReadTableFromFile(std::move(reader), &out);
     ASSERT_EQ(1, out->num_columns());
     ASSERT_EQ(values->length(), out->num_rows());
 
@@ -287,7 +292,9 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
       this->sink_, values->length(), default_writer_properties()));
 
   std::shared_ptr<Table> out;
-  this->ReadTableFromFile(this->ReaderFromSink(), &out);
+  std::unique_ptr<FileReader> reader;
+  this->ReaderFromSink(&reader);
+  this->ReadTableFromFile(std::move(reader), &out);
   ASSERT_EQ(1, out->num_columns());
   ASSERT_EQ(100, out->num_rows());
 
@@ -368,7 +375,9 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWriteArrowIO)
{
 
   auto source = std::make_shared<BufferReader>(pbuffer);
   std::shared_ptr<::arrow::Table> out;
-  this->ReadTableFromFile(ParquetFileReader::Open(std::move(source)), &out);
+  std::unique_ptr<FileReader> reader;
+  ASSERT_OK_NO_THROW(OpenFile(source, ::arrow::default_memory_pool(), &reader));
+  this->ReadTableFromFile(std::move(reader), &out);
   ASSERT_EQ(1, out->num_columns());
   ASSERT_EQ(values->length(), out->num_rows());
 
@@ -530,7 +539,9 @@ TEST_F(TestStringParquetIO, EmptyStringColumnRequiredWrite) {
       this->sink_, values->length(), default_writer_properties()));
 
   std::shared_ptr<Table> out;
-  this->ReadTableFromFile(this->ReaderFromSink(), &out);
+  std::unique_ptr<FileReader> reader;
+  this->ReaderFromSink(&reader);
+  this->ReadTableFromFile(std::move(reader), &out);
   ASSERT_EQ(1, out->num_columns());
   ASSERT_EQ(100, out->num_rows());
 
@@ -558,7 +569,7 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType> {
   typedef typename c_type_trait<TestType>::ArrowCType T;
 
   void MakeTestFile(std::vector<T>& values, int num_chunks,
-      std::unique_ptr<ParquetFileReader>* file_reader) {
+      std::unique_ptr<FileReader>* reader) {
     std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::REQUIRED);
     std::unique_ptr<ParquetFileWriter> file_writer = this->MakeWriter(schema);
     size_t chunk_size = values.size() / num_chunks;
@@ -578,12 +589,12 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType>
{
       row_group_writer->Close();
     }
     file_writer->Close();
-    *file_reader = this->ReaderFromSink();
+    this->ReaderFromSink(reader);
   }
 
   void CheckSingleColumnRequiredTableRead(int num_chunks) {
     std::vector<T> values(SMALL_SIZE, test_traits<TestType>::value);
-    std::unique_ptr<ParquetFileReader> file_reader;
+    std::unique_ptr<FileReader> file_reader;
     ASSERT_NO_THROW(MakeTestFile(values, num_chunks, &file_reader));
 
     std::shared_ptr<Table> out;
@@ -598,7 +609,7 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType> {
 
   void CheckSingleColumnRequiredRead(int num_chunks) {
     std::vector<T> values(SMALL_SIZE, test_traits<TestType>::value);
-    std::unique_ptr<ParquetFileReader> file_reader;
+    std::unique_ptr<FileReader> file_reader;
     ASSERT_NO_THROW(MakeTestFile(values, num_chunks, &file_reader));
 
     std::shared_ptr<Array> out;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0804faf4/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index cecbc42..7e14f56 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -190,14 +190,22 @@ FileReader::~FileReader() {}
 
 // Static ctor
 Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>& file,
-    MemoryPool* allocator, std::unique_ptr<FileReader>* reader) {
-  // TODO(wesm): reader properties
+    MemoryPool* allocator, const ReaderProperties& props,
+    const std::shared_ptr<FileMetaData>& metadata, std::unique_ptr<FileReader>*
reader) {
+  std::unique_ptr<RandomAccessSource> io_wrapper(new ArrowInputFile(file));
   std::unique_ptr<ParquetReader> pq_reader;
-  PARQUET_CATCH_NOT_OK(pq_reader = ParquetReader::Open(file));
+  PARQUET_CATCH_NOT_OK(
+      pq_reader = ParquetReader::Open(std::move(io_wrapper), props, metadata));
   reader->reset(new FileReader(allocator, std::move(pq_reader)));
   return Status::OK();
 }
 
+Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>& file,
+    MemoryPool* allocator, std::unique_ptr<FileReader>* reader) {
+  return OpenFile(file, allocator, ::parquet::default_reader_properties(),
+      nullptr, reader);
+}
+
 Status FileReader::GetFlatColumn(int i, std::unique_ptr<FlatColumnReader>* out) {
   return impl_->GetFlatColumn(i, out);
 }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0804faf4/src/parquet/arrow/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.h b/src/parquet/arrow/reader.h
index 2602824..518ae4b 100644
--- a/src/parquet/arrow/reader.h
+++ b/src/parquet/arrow/reader.h
@@ -139,6 +139,13 @@ class PARQUET_EXPORT FlatColumnReader {
 
 // Helper function to create a file reader from an implementation of an Arrow
 // readable file
+//
+// metadata : separately-computed file metadata, can be nullptr
+PARQUET_EXPORT
+::arrow::Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>&
file,
+    ::arrow::MemoryPool* allocator, const ReaderProperties& properties,
+    const std::shared_ptr<FileMetaData>& metadata, std::unique_ptr<FileReader>*
reader);
+
 PARQUET_EXPORT
 ::arrow::Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>&
file,
     ::arrow::MemoryPool* allocator, std::unique_ptr<FileReader>* reader);


Mime
View raw message