Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 96993200B86 for ; Sun, 18 Sep 2016 22:02:09 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 9523D160AD8; Sun, 18 Sep 2016 20:02:09 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 39918160A8C for ; Sun, 18 Sep 2016 22:02:08 +0200 (CEST) Received: (qmail 58097 invoked by uid 500); 18 Sep 2016 20:02:07 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 58088 invoked by uid 99); 18 Sep 2016 20:02:07 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 18 Sep 2016 20:02:07 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4E106E0551; Sun, 18 Sep 2016 20:02:07 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@arrow.apache.org Date: Sun, 18 Sep 2016 20:02:07 -0000 Message-Id: <1d2fd122a0ff4caaa098122f53fe9471@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] arrow git commit: ARROW-280: [C++] Refactor IPC / memory map IO to use common arrow_io interfaces. Create arrow_ipc leaf library archived-at: Sun, 18 Sep 2016 20:02:09 -0000 Repository: arrow Updated Branches: refs/heads/master 17e90e1d8 -> 559b86522 http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/ipc/symbols.map ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/symbols.map b/cpp/src/arrow/ipc/symbols.map new file mode 100644 index 0000000..b4ad98c --- /dev/null +++ b/cpp/src/arrow/ipc/symbols.map @@ -0,0 +1,18 @@ +{ + # Symbols marked as 'local' are not exported by the DSO and thus may not + # be used by client applications. + local: + # devtoolset / static-libstdc++ symbols + __cxa_*; + + extern "C++" { + # boost + boost::*; + + # devtoolset or -static-libstdc++ - the Red Hat devtoolset statically + # links c++11 symbols into binaries so that the result may be executed on + # a system with an older libstdc++ which doesn't include the necessary + # c++11 symbols. + std::*; + }; +}; http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/ipc/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index e7dbb84..f6582fc 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -34,31 +34,6 @@ namespace arrow { namespace ipc { -class MemoryMapFixture { - public: - void TearDown() { - for (auto path : tmp_files_) { - std::remove(path.c_str()); - } - } - - void CreateFile(const std::string path, int64_t size) { - FILE* file = fopen(path.c_str(), "w"); - if (file != nullptr) { tmp_files_.push_back(path); } - ftruncate(fileno(file), size); - fclose(file); - } - - Status InitMemoryMap( - int64_t size, const std::string& path, std::shared_ptr* mmap) { - CreateFile(path, size); - return MemoryMappedSource::Open(path, MemorySource::READ_WRITE, mmap); - } - - private: - std::vector tmp_files_; -}; - Status MakeRandomInt32Array( int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr* array) { std::shared_ptr data; http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/ipc/util.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/util.h b/cpp/src/arrow/ipc/util.h new file mode 100644 index 0000000..3f4001b --- /dev/null +++ b/cpp/src/arrow/ipc/util.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_IPC_UTIL_H +#define ARROW_IPC_UTIL_H + +#include + +#include "arrow/array.h" +#include "arrow/io/interfaces.h" +#include "arrow/util/status.h" + +namespace arrow { +namespace ipc { + +// A helper class to tracks the size of allocations +class MockOutputStream : public io::OutputStream { + public: + MockOutputStream() : extent_bytes_written_(0) {} + + Status Close() override { return Status::OK(); } + + Status Write(const uint8_t* data, int64_t nbytes) override { + extent_bytes_written_ += nbytes; + return Status::OK(); + } + + Status Tell(int64_t* position) override { + *position = extent_bytes_written_; + return Status::OK(); + } + + int64_t GetExtentBytesWritten() const { return extent_bytes_written_; } + + private: + int64_t extent_bytes_written_; +}; + +} // namespace ipc +} // namespace arrow + +#endif // ARROW_IPC_UTIL_H http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/CMakeLists.txt b/cpp/src/arrow/parquet/CMakeLists.txt index f2a90b7..c400e14 100644 --- a/cpp/src/arrow/parquet/CMakeLists.txt +++ b/cpp/src/arrow/parquet/CMakeLists.txt @@ -27,6 +27,7 @@ set(PARQUET_SRCS set(PARQUET_LIBS arrow_shared + arrow_io parquet_shared ) http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/io.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/io.cc b/cpp/src/arrow/parquet/io.cc index b6fdd67..a50d753 100644 --- a/cpp/src/arrow/parquet/io.cc +++ b/cpp/src/arrow/parquet/io.cc @@ -27,7 +27,7 @@ #include "arrow/util/status.h" // To assist with readability -using ArrowROFile = arrow::io::RandomAccessFile; +using ArrowROFile = arrow::io::ReadableFileInterface; namespace arrow { namespace parquet { @@ -58,7 +58,7 @@ void ParquetAllocator::Free(uint8_t* buffer, int64_t size) { ParquetReadSource::ParquetReadSource(ParquetAllocator* allocator) : file_(nullptr), allocator_(allocator) {} -Status ParquetReadSource::Open(const std::shared_ptr& file) { +Status ParquetReadSource::Open(const std::shared_ptr& file) { int64_t file_size; RETURN_NOT_OK(file->GetSize(&file_size)); http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/io.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/io.h b/cpp/src/arrow/parquet/io.h index 1c59695..1734863 100644 --- a/cpp/src/arrow/parquet/io.h +++ b/cpp/src/arrow/parquet/io.h @@ -62,7 +62,7 @@ class ARROW_EXPORT ParquetReadSource : public ::parquet::RandomAccessSource { explicit ParquetReadSource(ParquetAllocator* allocator); // We need to ask for the file size on opening the file, and this can fail - Status Open(const std::shared_ptr& file); + Status Open(const std::shared_ptr& file); void Close() override; int64_t Tell() const override; @@ -72,7 +72,7 @@ class ARROW_EXPORT ParquetReadSource : public ::parquet::RandomAccessSource { private: // An Arrow readable file of some kind - std::shared_ptr file_; + std::shared_ptr file_; // The allocator is required for creating managed buffers ParquetAllocator* allocator_; http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/parquet-io-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/parquet-io-test.cc b/cpp/src/arrow/parquet/parquet-io-test.cc index 6615457..208b3e8 100644 --- a/cpp/src/arrow/parquet/parquet-io-test.cc +++ b/cpp/src/arrow/parquet/parquet-io-test.cc @@ -22,6 +22,7 @@ #include "gtest/gtest.h" +#include "arrow/io/memory.h" #include "arrow/parquet/io.h" #include "arrow/test-util.h" #include "arrow/util/memory-pool.h" @@ -96,61 +97,13 @@ TEST(TestParquetAllocator, CustomPool) { // ---------------------------------------------------------------------- // Read source tests -class BufferReader : public io::RandomAccessFile { - public: - BufferReader(const uint8_t* buffer, int buffer_size) - : buffer_(buffer), buffer_size_(buffer_size), position_(0) {} - - Status Close() override { - // no-op - return Status::OK(); - } - - Status Tell(int64_t* position) override { - *position = position_; - return Status::OK(); - } - - Status ReadAt( - int64_t position, int64_t nbytes, int64_t* bytes_read, uint8_t* buffer) override { - RETURN_NOT_OK(Seek(position)); - return Read(nbytes, bytes_read, buffer); - } - - Status Read(int64_t nbytes, int64_t* bytes_read, uint8_t* buffer) override { - memcpy(buffer, buffer_ + position_, nbytes); - *bytes_read = std::min(nbytes, buffer_size_ - position_); - position_ += *bytes_read; - return Status::OK(); - } - - Status GetSize(int64_t* size) override { - *size = buffer_size_; - return Status::OK(); - } - - Status Seek(int64_t position) override { - if (position < 0 || position >= buffer_size_) { - return Status::IOError("position out of bounds"); - } - - position_ = position; - return Status::OK(); - } - - private: - const uint8_t* buffer_; - int buffer_size_; - int64_t position_; -}; - TEST(TestParquetReadSource, Basics) { std::string data = "this is the data"; auto data_buffer = reinterpret_cast(data.c_str()); ParquetAllocator allocator(default_memory_pool()); - auto file = std::make_shared(data_buffer, data.size()); + auto file = std::make_shared(data_buffer, data.size()); auto source = std::make_shared(&allocator); ASSERT_OK(source->Open(file)); http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/parquet-schema-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/parquet-schema-test.cc b/cpp/src/arrow/parquet/parquet-schema-test.cc index a2bcd3e..63ad8fb 100644 --- a/cpp/src/arrow/parquet/parquet-schema-test.cc +++ b/cpp/src/arrow/parquet/parquet-schema-test.cc @@ -178,8 +178,7 @@ class TestConvertArrowSchema : public ::testing::Test { NodePtr schema_node = GroupNode::Make("schema", Repetition::REPEATED, nodes); const GroupNode* expected_schema_node = static_cast(schema_node.get()); - const GroupNode* result_schema_node = - static_cast(result_schema_->schema().get()); + const GroupNode* result_schema_node = result_schema_->group_node(); ASSERT_EQ(expected_schema_node->field_count(), result_schema_node->field_count()); http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/reader.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/reader.cc b/cpp/src/arrow/parquet/reader.cc index 440ec84..0c2fc6e 100644 --- a/cpp/src/arrow/parquet/reader.cc +++ b/cpp/src/arrow/parquet/reader.cc @@ -149,7 +149,7 @@ bool FileReader::Impl::CheckForFlatColumn(const ::parquet::ColumnDescriptor* des } Status FileReader::Impl::GetFlatColumn(int i, std::unique_ptr* out) { - const ::parquet::SchemaDescriptor* schema = reader_->metadata()->schema_descriptor(); + const ::parquet::SchemaDescriptor* schema = reader_->metadata()->schema(); if (!CheckForFlatColumn(schema->Column(i))) { return Status::Invalid("The requested column is not flat"); @@ -167,9 +167,9 @@ Status FileReader::Impl::ReadFlatColumn(int i, std::shared_ptr* out) { } Status FileReader::Impl::ReadFlatTable(std::shared_ptr* table) { - auto descr = reader_->metadata()->schema_descriptor(); + auto descr = reader_->metadata()->schema(); - const std::string& name = descr->schema()->name(); + const std::string& name = descr->name(); std::shared_ptr schema; RETURN_NOT_OK(FromParquetSchema(descr, &schema)); @@ -193,7 +193,7 @@ FileReader::FileReader( FileReader::~FileReader() {} // Static ctor -Status OpenFile(const std::shared_ptr& file, +Status OpenFile(const std::shared_ptr& file, ParquetAllocator* allocator, std::unique_ptr* reader) { std::unique_ptr source(new ParquetReadSource(allocator)); RETURN_NOT_OK(source->Open(file)); http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/reader.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/reader.h b/cpp/src/arrow/parquet/reader.h index f1492f6..a9c64ec 100644 --- a/cpp/src/arrow/parquet/reader.h +++ b/cpp/src/arrow/parquet/reader.h @@ -137,7 +137,7 @@ class ARROW_EXPORT FlatColumnReader { // Helper function to create a file reader from an implementation of an Arrow // readable file ARROW_EXPORT -Status OpenFile(const std::shared_ptr& file, +Status OpenFile(const std::shared_ptr& file, ParquetAllocator* allocator, std::unique_ptr* reader); } // namespace parquet http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/schema.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/schema.cc b/cpp/src/arrow/parquet/schema.cc index cd91df3..ff32e51 100644 --- a/cpp/src/arrow/parquet/schema.cc +++ b/cpp/src/arrow/parquet/schema.cc @@ -202,7 +202,7 @@ Status FromParquetSchema( // TODO(wesm): Consider adding an arrow::Schema name attribute, which comes // from the root Parquet node const GroupNode* schema_node = - static_cast(parquet_schema->schema().get()); + static_cast(parquet_schema->group_node()); std::vector> fields(schema_node->field_count()); for (int i = 0; i < schema_node->field_count(); i++) { http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/parquet/writer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/parquet/writer.cc b/cpp/src/arrow/parquet/writer.cc index ddee573..2b47f14 100644 --- a/cpp/src/arrow/parquet/writer.cc +++ b/cpp/src/arrow/parquet/writer.cc @@ -334,7 +334,7 @@ Status WriteFlatTable(const Table* table, MemoryPool* pool, std::shared_ptr<::parquet::SchemaDescriptor> parquet_schema; RETURN_NOT_OK( ToParquetSchema(table->schema().get(), *properties.get(), &parquet_schema)); - auto schema_node = std::static_pointer_cast(parquet_schema->schema()); + auto schema_node = std::static_pointer_cast(parquet_schema->schema_root()); std::unique_ptr parquet_writer = ParquetFileWriter::Open(sink, schema_node, properties); FileWriter writer(pool, std::move(parquet_writer)); http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 02677d5..b4c3721 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -149,7 +149,7 @@ struct ARROW_EXPORT Field { int64_t dictionary; Field(const std::string& name, const TypePtr& type, bool nullable = true, - int64_t dictionary = 0) + int64_t dictionary = 0) : name(name), type(type), nullable(nullable), dictionary(dictionary) {} bool operator==(const Field& other) const { return this->Equals(other); } @@ -159,7 +159,7 @@ struct ARROW_EXPORT Field { bool Equals(const Field& other) const { return (this == &other) || (this->name == other.name && this->nullable == other.nullable && - this->dictionary == dictionary && this->type->Equals(other.type.get())); + this->dictionary == dictionary && this->type->Equals(other.type.get())); } bool Equals(const std::shared_ptr& other) const { return Equals(*other.get()); } http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/cpp/src/arrow/util/memory-pool-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc index e767e95..5d60376 100644 --- a/cpp/src/arrow/util/memory-pool-test.cc +++ b/cpp/src/arrow/util/memory-pool-test.cc @@ -64,6 +64,6 @@ TEST(DefaultMemoryPoolDeathTest, FreeLargeMemory) { pool->Free(data, 100); } -#endif // ARROW_VALGRIND +#endif // ARROW_VALGRIND } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/python/pyarrow/includes/libarrow_io.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/libarrow_io.pxd b/python/pyarrow/includes/libarrow_io.pxd index 734ace6..f338a43 100644 --- a/python/pyarrow/includes/libarrow_io.pxd +++ b/python/pyarrow/includes/libarrow_io.pxd @@ -29,25 +29,41 @@ cdef extern from "arrow/io/interfaces.h" namespace "arrow::io" nogil: ObjectType_FILE" arrow::io::ObjectType::FILE" ObjectType_DIRECTORY" arrow::io::ObjectType::DIRECTORY" - cdef cppclass FileBase: + cdef cppclass FileInterface: CStatus Close() CStatus Tell(int64_t* position) + FileMode mode() - cdef cppclass ReadableFile(FileBase): + cdef cppclass Readable: + CStatus Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) + + cdef cppclass Seekable: + CStatus Seek(int64_t position) + + cdef cppclass Writeable: + CStatus Write(const uint8_t* data, int64_t nbytes) + + cdef cppclass OutputStream(FileInterface, Writeable): + pass + + cdef cppclass InputStream(FileInterface, Readable): + pass + + cdef cppclass ReadableFileInterface(InputStream, Seekable): CStatus GetSize(int64_t* size) - CStatus Read(int64_t nbytes, int64_t* bytes_read, - uint8_t* buffer) CStatus ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, uint8_t* buffer) + CStatus ReadAt(int64_t position, int64_t nbytes, + int64_t* bytes_read, shared_ptr[Buffer]* out) - cdef cppclass RandomAccessFile(ReadableFile): - CStatus Seek(int64_t position) + cdef cppclass WriteableFileInterface(OutputStream, Seekable): + CStatus WriteAt(int64_t position, const uint8_t* data, + int64_t nbytes) - cdef cppclass WriteableFile(FileBase): - CStatus Write(const uint8_t* buffer, int64_t nbytes) - # CStatus Write(const uint8_t* buffer, int64_t nbytes, - # int64_t* bytes_written) + cdef cppclass ReadWriteFileInterface(ReadableFileInterface, + WriteableFileInterface): + pass cdef extern from "arrow/io/hdfs.h" namespace "arrow::io" nogil: @@ -70,10 +86,10 @@ cdef extern from "arrow/io/hdfs.h" namespace "arrow::io" nogil: int64_t block_size int16_t permissions - cdef cppclass HdfsReadableFile(RandomAccessFile): + cdef cppclass HdfsReadableFile(ReadableFileInterface): pass - cdef cppclass HdfsWriteableFile(WriteableFile): + cdef cppclass HdfsOutputStream(OutputStream): pass cdef cppclass CHdfsClient" arrow::io::HdfsClient": @@ -103,4 +119,4 @@ cdef extern from "arrow/io/hdfs.h" namespace "arrow::io" nogil: CStatus OpenWriteable(const c_string& path, c_bool append, int32_t buffer_size, int16_t replication, int64_t default_block_size, - shared_ptr[HdfsWriteableFile]* handle) + shared_ptr[HdfsOutputStream]* handle) http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/python/pyarrow/includes/parquet.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/parquet.pxd b/python/pyarrow/includes/parquet.pxd index fe24f59..f932a93 100644 --- a/python/pyarrow/includes/parquet.pxd +++ b/python/pyarrow/includes/parquet.pxd @@ -19,7 +19,7 @@ from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport CSchema, CStatus, CTable, MemoryPool -from pyarrow.includes.libarrow_io cimport RandomAccessFile +from pyarrow.includes.libarrow_io cimport ReadableFileInterface cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil: @@ -78,10 +78,10 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil: unique_ptr[ParquetFileReader] OpenFile(const c_string& path) cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: - cdef cppclass OutputStream: + cdef cppclass ParquetOutputStream" parquet::OutputStream": pass - cdef cppclass LocalFileOutputStream(OutputStream): + cdef cppclass LocalFileOutputStream(ParquetOutputStream): LocalFileOutputStream(const c_string& path) void Close() @@ -100,11 +100,11 @@ cdef extern from "arrow/parquet/io.h" namespace "arrow::parquet" nogil: cdef cppclass ParquetReadSource: ParquetReadSource(ParquetAllocator* allocator) - Open(const shared_ptr[RandomAccessFile]& file) + Open(const shared_ptr[ReadableFileInterface]& file) cdef extern from "arrow/parquet/reader.h" namespace "arrow::parquet" nogil: - CStatus OpenFile(const shared_ptr[RandomAccessFile]& file, + CStatus OpenFile(const shared_ptr[ReadableFileInterface]& file, ParquetAllocator* allocator, unique_ptr[FileReader]* reader) @@ -121,6 +121,8 @@ cdef extern from "arrow/parquet/schema.h" namespace "arrow::parquet" nogil: cdef extern from "arrow/parquet/writer.h" namespace "arrow::parquet" nogil: - cdef CStatus WriteFlatTable(const CTable* table, MemoryPool* pool, - const shared_ptr[OutputStream]& sink, int64_t chunk_size, - const shared_ptr[WriterProperties]& properties) + cdef CStatus WriteFlatTable( + const CTable* table, MemoryPool* pool, + const shared_ptr[ParquetOutputStream]& sink, + int64_t chunk_size, + const shared_ptr[WriterProperties]& properties) http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/python/pyarrow/io.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/io.pxd b/python/pyarrow/io.pxd index b92af72..f55fc0a 100644 --- a/python/pyarrow/io.pxd +++ b/python/pyarrow/io.pxd @@ -19,7 +19,8 @@ from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport * -from pyarrow.includes.libarrow_io cimport RandomAccessFile, WriteableFile +from pyarrow.includes.libarrow_io cimport (ReadableFileInterface, + OutputStream) cdef class NativeFileInterface: @@ -28,5 +29,5 @@ cdef class NativeFileInterface: # extension classes are technically virtual in the C++ sense)m we can # expose the arrow::io abstract file interfaces to other components # throughout the suite of Arrow C++ libraries - cdef read_handle(self, shared_ptr[RandomAccessFile]* file) - cdef write_handle(self, shared_ptr[WriteableFile]* file) + cdef read_handle(self, shared_ptr[ReadableFileInterface]* file) + cdef write_handle(self, shared_ptr[OutputStream]* file) http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/python/pyarrow/io.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx index b8bf883..f2eee26 100644 --- a/python/pyarrow/io.pyx +++ b/python/pyarrow/io.pyx @@ -316,16 +316,16 @@ cdef class HdfsClient: cdef class NativeFileInterface: - cdef read_handle(self, shared_ptr[RandomAccessFile]* file): + cdef read_handle(self, shared_ptr[ReadableFileInterface]* file): raise NotImplementedError - cdef write_handle(self, shared_ptr[WriteableFile]* file): + cdef write_handle(self, shared_ptr[OutputStream]* file): raise NotImplementedError cdef class HdfsFile(NativeFileInterface): cdef: shared_ptr[HdfsReadableFile] rd_file - shared_ptr[HdfsWriteableFile] wr_file + shared_ptr[HdfsOutputStream] wr_file bint is_readonly bint is_open object parent @@ -364,13 +364,13 @@ cdef class HdfsFile(NativeFileInterface): if self.is_readonly: raise IOError("only valid on writeonly files") - cdef read_handle(self, shared_ptr[RandomAccessFile]* file): + cdef read_handle(self, shared_ptr[ReadableFileInterface]* file): self._assert_readable() - file[0] = self.rd_file + file[0] = self.rd_file - cdef write_handle(self, shared_ptr[WriteableFile]* file): + cdef write_handle(self, shared_ptr[OutputStream]* file): self._assert_writeable() - file[0] = self.wr_file + file[0] = self.wr_file def size(self): cdef int64_t size http://git-wip-us.apache.org/repos/asf/arrow/blob/559b8652/python/pyarrow/parquet.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx index ebba1a1..fb36b29 100644 --- a/python/pyarrow/parquet.pyx +++ b/python/pyarrow/parquet.pyx @@ -21,7 +21,7 @@ from pyarrow.includes.libarrow cimport * from pyarrow.includes.parquet cimport * -from pyarrow.includes.libarrow_io cimport RandomAccessFile, WriteableFile +from pyarrow.includes.libarrow_io cimport ReadableFileInterface cimport pyarrow.includes.pyarrow as pyarrow from pyarrow.compat import tobytes @@ -55,7 +55,7 @@ cdef class ParquetReader: ParquetFileReader.OpenFile(path))) cdef open_native_file(self, NativeFileInterface file): - cdef shared_ptr[RandomAccessFile] cpp_handle + cdef shared_ptr[ReadableFileInterface] cpp_handle file.read_handle(&cpp_handle) check_cstatus(OpenFile(cpp_handle, &self.allocator, &self.reader)) @@ -105,7 +105,7 @@ def write_table(table, filename, chunk_size=None, version=None): """ cdef Table table_ = table cdef CTable* ctable_ = table_.table - cdef shared_ptr[OutputStream] sink + cdef shared_ptr[ParquetOutputStream] sink cdef WriterProperties.Builder properties_builder cdef int64_t chunk_size_ = 0 if chunk_size is None: