Return-Path: X-Original-To: apmail-arrow-commits-archive@minotaur.apache.org Delivered-To: apmail-arrow-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id F3A8B19013 for ; Wed, 23 Mar 2016 01:45:22 +0000 (UTC) Received: (qmail 28222 invoked by uid 500); 23 Mar 2016 01:45:22 -0000 Delivered-To: apmail-arrow-commits-archive@arrow.apache.org Received: (qmail 28204 invoked by uid 500); 23 Mar 2016 01:45:22 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 28195 invoked by uid 99); 23 Mar 2016 01:45:22 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Mar 2016 01:45:22 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B5C72E0044; Wed, 23 Mar 2016 01:45:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@arrow.apache.org Date: Wed, 23 Mar 2016 01:45:22 -0000 Message-Id: <69ef78b99a4b41d680d9e10436bc92e7@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/3] arrow git commit: ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps Repository: arrow Updated Branches: refs/heads/master 093f9bd8c -> 65db0da80 http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/integer.cc b/cpp/src/arrow/types/integer.cc deleted file mode 100644 index 4696536..0000000 --- a/cpp/src/arrow/types/integer.cc +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/integer.h" - -namespace arrow { - -} // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/integer.h b/cpp/src/arrow/types/integer.h deleted file mode 100644 index 5684191..0000000 --- a/cpp/src/arrow/types/integer.h +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_INTEGER_H -#define ARROW_TYPES_INTEGER_H - -#include -#include - -#include "arrow/types/primitive.h" -#include "arrow/type.h" - -namespace arrow { - -// Array containers - -typedef PrimitiveArrayImpl UInt8Array; -typedef PrimitiveArrayImpl Int8Array; - -typedef PrimitiveArrayImpl UInt16Array; -typedef PrimitiveArrayImpl Int16Array; - -typedef PrimitiveArrayImpl UInt32Array; -typedef PrimitiveArrayImpl Int32Array; - -typedef PrimitiveArrayImpl UInt64Array; -typedef PrimitiveArrayImpl Int64Array; - -// Builders - -typedef PrimitiveBuilder UInt8Builder; -typedef PrimitiveBuilder UInt16Builder; -typedef PrimitiveBuilder UInt32Builder; -typedef PrimitiveBuilder UInt64Builder; - -typedef PrimitiveBuilder Int8Builder; -typedef PrimitiveBuilder Int16Builder; -typedef PrimitiveBuilder Int32Builder; -typedef PrimitiveBuilder Int64Builder; - -} // namespace arrow - -#endif // ARROW_TYPES_INTEGER_H http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc index 168e370..fb731ed 100644 --- a/cpp/src/arrow/types/json.cc +++ b/cpp/src/arrow/types/json.cc @@ -20,7 +20,6 @@ #include #include "arrow/type.h" -#include "arrow/types/string.h" #include "arrow/types/union.h" namespace arrow { http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h index b67fb38..9c850af 100644 --- a/cpp/src/arrow/types/json.h +++ b/cpp/src/arrow/types/json.h @@ -28,8 +28,8 @@ struct JSONScalar : public DataType { static TypePtr dense_type; static TypePtr sparse_type; - explicit JSONScalar(bool dense = true, bool nullable = true) - : DataType(LogicalType::JSON_SCALAR, nullable), + explicit JSONScalar(bool dense = true) + : DataType(Type::JSON_SCALAR), dense(dense) {} }; http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index 02991de..eb55ca8 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -15,20 +15,21 @@ // specific language governing permissions and limitations // under the License. -#include #include #include #include #include #include +#include "gtest/gtest.h" + #include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/types/construct.h" -#include "arrow/types/integer.h" #include "arrow/types/list.h" -#include "arrow/types/string.h" +#include "arrow/types/primitive.h" #include "arrow/types/test-common.h" #include "arrow/util/status.h" @@ -39,27 +40,24 @@ using std::vector; namespace arrow { -class ArrayBuilder; - TEST(TypesTest, TestListType) { std::shared_ptr vt = std::make_shared(); ListType list_type(vt); - ASSERT_EQ(list_type.type, LogicalType::LIST); + ASSERT_EQ(list_type.type, Type::LIST); ASSERT_EQ(list_type.name(), string("list")); - ASSERT_EQ(list_type.ToString(), string("list")); + ASSERT_EQ(list_type.ToString(), string("list")); - ASSERT_EQ(list_type.value_type->type, vt->type); - ASSERT_EQ(list_type.value_type->type, vt->type); + ASSERT_EQ(list_type.value_type()->type, vt->type); + ASSERT_EQ(list_type.value_type()->type, vt->type); - std::shared_ptr st = std::make_shared(false); - std::shared_ptr lt = std::make_shared(st, false); - ASSERT_EQ(lt->ToString(), string("list not null")); + std::shared_ptr st = std::make_shared(); + std::shared_ptr lt = std::make_shared(st); + ASSERT_EQ(lt->ToString(), string("list")); - ListType lt2(lt, false); - ASSERT_EQ(lt2.ToString(), - string("list not null> not null")); + ListType lt2(lt); + ASSERT_EQ(lt2.ToString(), string("list>")); } // ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 69a79a7..670ee4d 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -19,4 +19,33 @@ namespace arrow { +bool ListArray::EqualsExact(const ListArray& other) const { + if (this == &other) return true; + if (null_count_ != other.null_count_) { + return false; + } + + bool equal_offsets = offset_buf_->Equals(*other.offset_buf_, + length_ + 1); + bool equal_nulls = true; + if (null_count_ > 0) { + equal_nulls = nulls_->Equals(*other.nulls_, + util::bytes_for_bits(length_)); + } + + if (!(equal_offsets && equal_nulls)) { + return false; + } + + return values()->Equals(other.values()); +} + +bool ListArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) return true; + if (this->type_enum() != arr->type_enum()) { + return false; + } + return EqualsExact(*static_cast(arr.get())); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index 210c76a..141f762 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -21,12 +21,10 @@ #include #include #include -#include #include "arrow/array.h" #include "arrow/builder.h" #include "arrow/type.h" -#include "arrow/types/integer.h" #include "arrow/types/primitive.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" @@ -38,29 +36,19 @@ class MemoryPool; class ListArray : public Array { public: - ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {} - ListArray(const TypePtr& type, int32_t length, std::shared_ptr offsets, const ArrayPtr& values, int32_t null_count = 0, - std::shared_ptr nulls = nullptr) { - Init(type, length, offsets, values, null_count, nulls); - } - - virtual ~ListArray() {} - - void Init(const TypePtr& type, int32_t length, std::shared_ptr offsets, - const ArrayPtr& values, - int32_t null_count = 0, - std::shared_ptr nulls = nullptr) { + std::shared_ptr nulls = nullptr) : + Array(type, length, null_count, nulls) { offset_buf_ = offsets; offsets_ = offsets == nullptr? nullptr : reinterpret_cast(offset_buf_->data()); - values_ = values; - Array::Init(type, length, null_count, nulls); } + virtual ~ListArray() {} + // Return a shared pointer in case the requestor desires to share ownership // with this array. const std::shared_ptr& values() const {return values_;} @@ -77,6 +65,9 @@ class ListArray : public Array { int32_t value_offset(int i) { return offsets_[i];} int32_t value_length(int i) { return offsets_[i + 1] - offsets_[i];} + bool EqualsExact(const ListArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; + protected: std::shared_ptr offset_buf_; const int32_t* offsets_; @@ -137,8 +128,6 @@ class ListBuilder : public Int32Builder { template std::shared_ptr Transfer() { - auto result = std::make_shared(); - std::shared_ptr items = value_builder_->Finish(); // Add final offset if the length is non-zero @@ -146,8 +135,9 @@ class ListBuilder : public Int32Builder { raw_buffer()[length_] = items->length(); } - result->Init(type_, length_, values_, items, + auto result = std::make_shared(type_, length_, values_, items, null_count_, nulls_); + values_ = nulls_ = nullptr; capacity_ = length_ = null_count_ = 0; http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index f35a258..7eae8cd 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -15,21 +15,17 @@ // specific language governing permissions and limitations // under the License. -#include - #include #include #include #include -#include "arrow/array.h" +#include "gtest/gtest.h" + #include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/boolean.h" #include "arrow/types/construct.h" -#include "arrow/types/floating.h" -#include "arrow/types/integer.h" #include "arrow/types/primitive.h" #include "arrow/types/test-common.h" #include "arrow/util/bit-util.h" @@ -43,23 +39,17 @@ using std::vector; namespace arrow { -TEST(TypesTest, TestBytesType) { - BytesType t1(3); - - ASSERT_EQ(t1.type, LayoutEnum::BYTE); - ASSERT_EQ(t1.size, 3); -} - +class Array; #define PRIMITIVE_TEST(KLASS, ENUM, NAME) \ TEST(TypesTest, TestPrimitive_##ENUM) { \ KLASS tp; \ \ - ASSERT_EQ(tp.type, LogicalType::ENUM); \ + ASSERT_EQ(tp.type, Type::ENUM); \ ASSERT_EQ(tp.name(), string(NAME)); \ \ KLASS tp_copy = tp; \ - ASSERT_EQ(tp_copy.type, LogicalType::ENUM); \ + ASSERT_EQ(tp_copy.type, Type::ENUM); \ } PRIMITIVE_TEST(Int8Type, INT8, "int8"); @@ -109,22 +99,20 @@ class TestPrimitiveBuilder : public TestBuilder { void RandomData(int N, double pct_null = 0.1) { Attrs::draw(N, &draws_); - random_nulls(N, pct_null, &nulls_); + test::random_nulls(N, pct_null, &nulls_); } void CheckNullable() { - ArrayType expected; int size = builder_->length(); auto ex_data = std::make_shared( reinterpret_cast(draws_.data()), size * sizeof(T)); - auto ex_nulls = bytes_to_null_buffer(nulls_.data(), size); - - int32_t ex_null_count = null_count(nulls_); + auto ex_nulls = test::bytes_to_null_buffer(nulls_.data(), size); + int32_t ex_null_count = test::null_count(nulls_); - expected.Init(size, ex_data, ex_null_count, ex_nulls); + auto expected = std::make_shared(size, ex_data, ex_null_count, ex_nulls); std::shared_ptr result = std::dynamic_pointer_cast( builder_->Finish()); @@ -135,18 +123,17 @@ class TestPrimitiveBuilder : public TestBuilder { ASSERT_EQ(0, builder_->null_count()); ASSERT_EQ(nullptr, builder_->buffer()); - ASSERT_TRUE(result->Equals(expected)); + ASSERT_TRUE(result->EqualsExact(*expected.get())); ASSERT_EQ(ex_null_count, result->null_count()); } void CheckNonNullable() { - ArrayType expected; int size = builder_nn_->length(); auto ex_data = std::make_shared(reinterpret_cast(draws_.data()), size * sizeof(T)); - expected.Init(size, ex_data); + auto expected = std::make_shared(size, ex_data); std::shared_ptr result = std::dynamic_pointer_cast( builder_nn_->Finish()); @@ -156,7 +143,7 @@ class TestPrimitiveBuilder : public TestBuilder { ASSERT_EQ(0, builder_nn_->capacity()); ASSERT_EQ(nullptr, builder_nn_->buffer()); - ASSERT_TRUE(result->Equals(expected)); + ASSERT_TRUE(result->EqualsExact(*expected.get())); ASSERT_EQ(0, result->null_count()); } @@ -183,8 +170,8 @@ class TestPrimitiveBuilder : public TestBuilder { #define PINT_DECL(CapType, c_type, LOWER, UPPER) \ struct P##CapType { \ PTYPE_DECL(CapType, c_type); \ - static void draw(int N, vector* draws) { \ - randint(N, LOWER, UPPER, draws); \ + static void draw(int N, vector* draws) { \ + test::randint(N, LOWER, UPPER, draws); \ } \ } http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index c86260b..32b8bfa 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -26,16 +26,16 @@ namespace arrow { // ---------------------------------------------------------------------- // Primitive array base -void PrimitiveArray::Init(const TypePtr& type, int32_t length, +PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, int32_t null_count, - const std::shared_ptr& nulls) { - Array::Init(type, length, null_count, nulls); + const std::shared_ptr& nulls) : + Array(type, length, null_count, nulls) { data_ = data; raw_data_ = data == nullptr? nullptr : data_->data(); } -bool PrimitiveArray::Equals(const PrimitiveArray& other) const { +bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const { if (this == &other) return true; if (null_count_ != other.null_count_) { return false; @@ -50,4 +50,12 @@ bool PrimitiveArray::Equals(const PrimitiveArray& other) const { } } +bool PrimitiveArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) return true; + if (this->type_enum() != arr->type_enum()) { + return false; + } + return EqualsExact(*static_cast(arr.get())); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index 22ab59c..e01027c 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -21,7 +21,6 @@ #include #include #include -#include #include "arrow/array.h" #include "arrow/builder.h" @@ -38,64 +37,57 @@ class MemoryPool; // Base class for fixed-size logical types class PrimitiveArray : public Array { public: - PrimitiveArray() : Array(), data_(nullptr), raw_data_(nullptr) {} - - virtual ~PrimitiveArray() {} - - void Init(const TypePtr& type, int32_t length, + PrimitiveArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, int32_t null_count = 0, const std::shared_ptr& nulls = nullptr); + virtual ~PrimitiveArray() {} const std::shared_ptr& data() const { return data_;} - bool Equals(const PrimitiveArray& other) const; + bool EqualsExact(const PrimitiveArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; protected: std::shared_ptr data_; const uint8_t* raw_data_; }; - -template -class PrimitiveArrayImpl : public PrimitiveArray { - public: - typedef typename TypeClass::c_type value_type; - - PrimitiveArrayImpl() : PrimitiveArray() {} - - virtual ~PrimitiveArrayImpl() {} - - PrimitiveArrayImpl(int32_t length, const std::shared_ptr& data, - int32_t null_count = 0, - const std::shared_ptr& nulls = nullptr) { - Init(length, data, null_count, nulls); - } - - void Init(int32_t length, const std::shared_ptr& data, - int32_t null_count = 0, - const std::shared_ptr& nulls = nullptr) { - TypePtr type(new TypeClass()); - PrimitiveArray::Init(type, length, data, null_count, nulls); - } - - bool Equals(const PrimitiveArrayImpl& other) const { - return PrimitiveArray::Equals(*static_cast(&other)); - } - - const value_type* raw_data() const { - return reinterpret_cast(raw_data_); - } - - value_type Value(int i) const { - return raw_data()[i]; - } - - TypeClass* exact_type() const { - return static_cast(type_); - } +#define NUMERIC_ARRAY_DECL(NAME, TypeClass, T) \ +class NAME : public PrimitiveArray { \ + public: \ + using value_type = T; \ + using PrimitiveArray::PrimitiveArray; \ + NAME(int32_t length, const std::shared_ptr& data, \ + int32_t null_count = 0, \ + const std::shared_ptr& nulls = nullptr) : \ + PrimitiveArray(std::make_shared(), length, data, \ + null_count, nulls) {} \ + \ + bool EqualsExact(const NAME& other) const { \ + return PrimitiveArray::EqualsExact( \ + *static_cast(&other)); \ + } \ + \ + const T* raw_data() const { \ + return reinterpret_cast(raw_data_); \ + } \ + \ + T Value(int i) const { \ + return raw_data()[i]; \ + } \ }; +NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type, uint8_t); +NUMERIC_ARRAY_DECL(Int8Array, Int8Type, int8_t); +NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type, uint16_t); +NUMERIC_ARRAY_DECL(Int16Array, Int16Type, int16_t); +NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type, uint32_t); +NUMERIC_ARRAY_DECL(Int32Array, Int32Type, int32_t); +NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type, uint64_t); +NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t); +NUMERIC_ARRAY_DECL(FloatArray, FloatType, float); +NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double); template class PrimitiveBuilder : public ArrayBuilder { @@ -202,8 +194,9 @@ class PrimitiveBuilder : public ArrayBuilder { } std::shared_ptr Finish() override { - std::shared_ptr result = std::make_shared(); - result->PrimitiveArray::Init(type_, length_, values_, null_count_, nulls_); + std::shared_ptr result = std::make_shared( + type_, length_, values_, null_count_, nulls_); + values_ = nulls_ = nullptr; capacity_ = length_ = null_count_ = 0; return result; @@ -222,6 +215,21 @@ class PrimitiveBuilder : public ArrayBuilder { int elsize_; }; +// Builders + +typedef PrimitiveBuilder UInt8Builder; +typedef PrimitiveBuilder UInt16Builder; +typedef PrimitiveBuilder UInt32Builder; +typedef PrimitiveBuilder UInt64Builder; + +typedef PrimitiveBuilder Int8Builder; +typedef PrimitiveBuilder Int16Builder; +typedef PrimitiveBuilder Int32Builder; +typedef PrimitiveBuilder Int64Builder; + +typedef PrimitiveBuilder FloatBuilder; +typedef PrimitiveBuilder DoubleBuilder; + } // namespace arrow #endif // ARROW_TYPES_PRIMITIVE_H http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc index 6381093..7dc3d68 100644 --- a/cpp/src/arrow/types/string-test.cc +++ b/cpp/src/arrow/types/string-test.cc @@ -15,21 +15,20 @@ // specific language governing permissions and limitations // under the License. -#include #include +#include #include #include #include +#include "gtest/gtest.h" + #include "arrow/array.h" -#include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/construct.h" -#include "arrow/types/integer.h" +#include "arrow/types/primitive.h" #include "arrow/types/string.h" #include "arrow/types/test-common.h" -#include "arrow/util/status.h" namespace arrow { @@ -38,14 +37,14 @@ class Buffer; TEST(TypesTest, TestCharType) { CharType t1(5); - ASSERT_EQ(t1.type, LogicalType::CHAR); + ASSERT_EQ(t1.type, Type::CHAR); ASSERT_EQ(t1.size, 5); ASSERT_EQ(t1.ToString(), std::string("char(5)")); // Test copy constructor CharType t2 = t1; - ASSERT_EQ(t2.type, LogicalType::CHAR); + ASSERT_EQ(t2.type, Type::CHAR); ASSERT_EQ(t2.size, 5); } @@ -53,22 +52,20 @@ TEST(TypesTest, TestCharType) { TEST(TypesTest, TestVarcharType) { VarcharType t1(5); - ASSERT_EQ(t1.type, LogicalType::VARCHAR); + ASSERT_EQ(t1.type, Type::VARCHAR); ASSERT_EQ(t1.size, 5); - ASSERT_EQ(t1.physical_type.size, 6); ASSERT_EQ(t1.ToString(), std::string("varchar(5)")); // Test copy constructor VarcharType t2 = t1; - ASSERT_EQ(t2.type, LogicalType::VARCHAR); + ASSERT_EQ(t2.type, Type::VARCHAR); ASSERT_EQ(t2.size, 5); - ASSERT_EQ(t2.physical_type.size, 6); } TEST(TypesTest, TestStringType) { StringType str; - ASSERT_EQ(str.type, LogicalType::STRING); + ASSERT_EQ(str.type, Type::STRING); ASSERT_EQ(str.name(), std::string("string")); } @@ -90,15 +87,16 @@ class TestStringContainer : public ::testing::Test { length_ = offsets_.size() - 1; int nchars = chars_.size(); - value_buf_ = to_buffer(chars_); + value_buf_ = test::to_buffer(chars_); values_ = ArrayPtr(new UInt8Array(nchars, value_buf_)); - offsets_buf_ = to_buffer(offsets_); + offsets_buf_ = test::to_buffer(offsets_); - nulls_buf_ = bytes_to_null_buffer(nulls_.data(), nulls_.size()); - null_count_ = null_count(nulls_); + nulls_buf_ = test::bytes_to_null_buffer(nulls_.data(), nulls_.size()); + null_count_ = test::null_count(nulls_); - strings_.Init(length_, offsets_buf_, values_, null_count_, nulls_buf_); + strings_ = std::make_shared(length_, offsets_buf_, values_, + null_count_, nulls_buf_); } protected: @@ -116,28 +114,28 @@ class TestStringContainer : public ::testing::Test { int length_; ArrayPtr values_; - StringArray strings_; + std::shared_ptr strings_; }; TEST_F(TestStringContainer, TestArrayBasics) { - ASSERT_EQ(length_, strings_.length()); - ASSERT_EQ(1, strings_.null_count()); + ASSERT_EQ(length_, strings_->length()); + ASSERT_EQ(1, strings_->null_count()); } TEST_F(TestStringContainer, TestType) { - TypePtr type = strings_.type(); + TypePtr type = strings_->type(); - ASSERT_EQ(LogicalType::STRING, type->type); - ASSERT_EQ(LogicalType::STRING, strings_.logical_type()); + ASSERT_EQ(Type::STRING, type->type); + ASSERT_EQ(Type::STRING, strings_->type_enum()); } TEST_F(TestStringContainer, TestListFunctions) { int pos = 0; for (size_t i = 0; i < expected_.size(); ++i) { - ASSERT_EQ(pos, strings_.value_offset(i)); - ASSERT_EQ(expected_[i].size(), strings_.value_length(i)); + ASSERT_EQ(pos, strings_->value_offset(i)); + ASSERT_EQ(expected_[i].size(), strings_->value_length(i)); pos += expected_[i].size(); } } @@ -151,9 +149,9 @@ TEST_F(TestStringContainer, TestDestructor) { TEST_F(TestStringContainer, TestGetString) { for (size_t i = 0; i < expected_.size(); ++i) { if (nulls_[i]) { - ASSERT_TRUE(strings_.IsNull(i)); + ASSERT_TRUE(strings_->IsNull(i)); } else { - ASSERT_EQ(expected_[i], strings_.GetString(i)); + ASSERT_EQ(expected_[i], strings_->GetString(i)); } } } @@ -199,7 +197,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) { Done(); ASSERT_EQ(reps * N, result_->length()); - ASSERT_EQ(reps * null_count(is_null), result_->null_count()); + ASSERT_EQ(reps * test::null_count(is_null), result_->null_count()); ASSERT_EQ(reps * 6, result_->values()->length()); int32_t length; http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h index 8ccc0a9..2b3fba5 100644 --- a/cpp/src/arrow/types/string.h +++ b/cpp/src/arrow/types/string.h @@ -25,25 +25,21 @@ #include "arrow/array.h" #include "arrow/type.h" -#include "arrow/types/integer.h" #include "arrow/types/list.h" +#include "arrow/types/primitive.h" #include "arrow/util/status.h" namespace arrow { -class ArrayBuilder; class Buffer; class MemoryPool; struct CharType : public DataType { int size; - BytesType physical_type; - - explicit CharType(int size, bool nullable = true) - : DataType(LogicalType::CHAR, nullable), - size(size), - physical_type(BytesType(size)) {} + explicit CharType(int size) + : DataType(Type::CHAR), + size(size) {} CharType(const CharType& other) : CharType(other.size) {} @@ -56,54 +52,36 @@ struct CharType : public DataType { struct VarcharType : public DataType { int size; - BytesType physical_type; - - explicit VarcharType(int size, bool nullable = true) - : DataType(LogicalType::VARCHAR, nullable), - size(size), - physical_type(BytesType(size + 1)) {} + explicit VarcharType(int size) + : DataType(Type::VARCHAR), + size(size) {} VarcharType(const VarcharType& other) : VarcharType(other.size) {} virtual std::string ToString() const; }; -static const LayoutPtr byte1(new BytesType(1)); -static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1)); - // TODO: add a BinaryArray layer in between class StringArray : public ListArray { public: - StringArray() : ListArray(), bytes_(nullptr), raw_bytes_(nullptr) {} - - StringArray(int32_t length, const std::shared_ptr& offsets, - const ArrayPtr& values, - int32_t null_count = 0, - const std::shared_ptr& nulls = nullptr) { - Init(length, offsets, values, null_count, nulls); - } - - void Init(const TypePtr& type, int32_t length, + StringArray(const TypePtr& type, int32_t length, const std::shared_ptr& offsets, const ArrayPtr& values, int32_t null_count = 0, - const std::shared_ptr& nulls = nullptr) { - ListArray::Init(type, length, offsets, values, null_count, nulls); - - // TODO: type validation for values array - + const std::shared_ptr& nulls = nullptr) : + ListArray(type, length, offsets, values, null_count, nulls) { // For convenience bytes_ = static_cast(values.get()); raw_bytes_ = bytes_->raw_data(); } - void Init(int32_t length, const std::shared_ptr& offsets, + StringArray(int32_t length, + const std::shared_ptr& offsets, const ArrayPtr& values, int32_t null_count = 0, - const std::shared_ptr& nulls = nullptr) { - TypePtr type(new StringType()); - Init(type, length, offsets, values, null_count, nulls); - } + const std::shared_ptr& nulls = nullptr) : + StringArray(std::make_shared(), length, offsets, values, + null_count, nulls) {} // Compute the pointer t const uint8_t* GetValue(int i, int32_t* out_length) const { @@ -125,9 +103,6 @@ class StringArray : public ListArray { }; // Array builder - - - class StringBuilder : public ListBuilder { public: explicit StringBuilder(MemoryPool* pool, const TypePtr& type) : http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/struct-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc index 9a4777e..d94396f 100644 --- a/cpp/src/arrow/types/struct-test.cc +++ b/cpp/src/arrow/types/struct-test.cc @@ -15,16 +15,13 @@ // specific language governing permissions and limitations // under the License. -#include - #include #include #include +#include "gtest/gtest.h" + #include "arrow/type.h" -#include "arrow/types/integer.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" using std::shared_ptr; using std::string; @@ -42,13 +39,13 @@ TEST(TestStructType, Basics) { TypePtr f2_type = TypePtr(new UInt8Type()); auto f2 = std::make_shared("f2", f2_type); - vector > fields = {f0, f1, f2}; + vector> fields = {f0, f1, f2}; StructType struct_type(fields); - ASSERT_TRUE(struct_type.field(0)->Equals(f0)); - ASSERT_TRUE(struct_type.field(1)->Equals(f1)); - ASSERT_TRUE(struct_type.field(2)->Equals(f2)); + ASSERT_TRUE(struct_type.child(0)->Equals(f0)); + ASSERT_TRUE(struct_type.child(1)->Equals(f1)); + ASSERT_TRUE(struct_type.child(2)->Equals(f2)); ASSERT_EQ(struct_type.ToString(), "struct"); http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h index 1744efc..227aca6 100644 --- a/cpp/src/arrow/types/test-common.h +++ b/cpp/src/arrow/types/test-common.h @@ -18,11 +18,12 @@ #ifndef ARROW_TYPES_TEST_COMMON_H #define ARROW_TYPES_TEST_COMMON_H -#include #include #include #include +#include "gtest/gtest.h" + #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/util/memory-pool.h" @@ -34,7 +35,7 @@ namespace arrow { class TestBuilder : public ::testing::Test { public: void SetUp() { - pool_ = GetDefaultMemoryPool(); + pool_ = default_memory_pool(); type_ = TypePtr(new UInt8Type()); builder_.reset(new UInt8Builder(pool_, type_)); builder_nn_.reset(new UInt8Builder(pool_, type_)); http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/union.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h index 9aff780..29cda90 100644 --- a/cpp/src/arrow/types/union.h +++ b/cpp/src/arrow/types/union.h @@ -30,8 +30,8 @@ namespace arrow { class Buffer; -struct DenseUnionType : public CollectionType { - typedef CollectionType Base; +struct DenseUnionType : public CollectionType { + typedef CollectionType Base; explicit DenseUnionType(const std::vector& child_types) : Base() { @@ -42,8 +42,8 @@ struct DenseUnionType : public CollectionType { }; -struct SparseUnionType : public CollectionType { - typedef CollectionType Base; +struct SparseUnionType : public CollectionType { + typedef CollectionType Base; explicit SparseUnionType(const std::vector& child_types) : Base() { @@ -55,28 +55,20 @@ struct SparseUnionType : public CollectionType { class UnionArray : public Array { - public: - UnionArray() : Array() {} - protected: // The data are types encoded as int16 Buffer* types_; - std::vector > children_; + std::vector> children_; }; class DenseUnionArray : public UnionArray { - public: - DenseUnionArray() : UnionArray() {} - protected: Buffer* offset_buf_; }; class SparseUnionArray : public UnionArray { - public: - SparseUnionArray() : UnionArray() {} }; } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc index 7506ca5..220bff0 100644 --- a/cpp/src/arrow/util/bit-util-test.cc +++ b/cpp/src/arrow/util/bit-util-test.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include - #include "arrow/util/bit-util.h" +#include "gtest/gtest.h" + namespace arrow { TEST(UtilTests, TestNextPower2) { http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index 5e7197f..1d2d1d5 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -19,7 +19,6 @@ #define ARROW_UTIL_BIT_UTIL_H #include -#include #include namespace arrow { http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc index 9f1fd91..1d58226 100644 --- a/cpp/src/arrow/util/buffer-test.cc +++ b/cpp/src/arrow/util/buffer-test.cc @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include #include #include #include +#include "gtest/gtest.h" + #include "arrow/test-util.h" #include "arrow/util/buffer.h" #include "arrow/util/status.h" http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc index 50f4716..04cdcd7 100644 --- a/cpp/src/arrow/util/buffer.cc +++ b/cpp/src/arrow/util/buffer.cc @@ -40,7 +40,7 @@ std::shared_ptr MutableBuffer::GetImmutableView() { PoolBuffer::PoolBuffer(MemoryPool* pool) : ResizableBuffer(nullptr, 0) { if (pool == nullptr) { - pool = GetDefaultMemoryPool(); + pool = default_memory_pool(); } pool_ = pool; } http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc index 954b5f9..6ef07a0 100644 --- a/cpp/src/arrow/util/memory-pool-test.cc +++ b/cpp/src/arrow/util/memory-pool-test.cc @@ -15,10 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include #include #include +#include "gtest/gtest.h" + #include "arrow/test-util.h" #include "arrow/util/memory-pool.h" #include "arrow/util/status.h" @@ -26,7 +27,7 @@ namespace arrow { TEST(DefaultMemoryPool, MemoryTracking) { - MemoryPool* pool = GetDefaultMemoryPool(); + MemoryPool* pool = default_memory_pool(); uint8_t* data; ASSERT_OK(pool->Allocate(100, &data)); @@ -37,7 +38,7 @@ TEST(DefaultMemoryPool, MemoryTracking) { } TEST(DefaultMemoryPool, OOM) { - MemoryPool* pool = GetDefaultMemoryPool(); + MemoryPool* pool = default_memory_pool(); uint8_t* data; int64_t to_alloc = std::numeric_limits::max(); http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc index 5820346..0b885e9 100644 --- a/cpp/src/arrow/util/memory-pool.cc +++ b/cpp/src/arrow/util/memory-pool.cc @@ -70,9 +70,9 @@ void InternalMemoryPool::Free(uint8_t* buffer, int64_t size) { InternalMemoryPool::~InternalMemoryPool() {} -MemoryPool* GetDefaultMemoryPool() { - static InternalMemoryPool default_memory_pool; - return &default_memory_pool; +MemoryPool* default_memory_pool() { + static InternalMemoryPool default_memory_pool_; + return &default_memory_pool_; } } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/memory-pool.h b/cpp/src/arrow/util/memory-pool.h index a7cb10d..0d24786 100644 --- a/cpp/src/arrow/util/memory-pool.h +++ b/cpp/src/arrow/util/memory-pool.h @@ -34,7 +34,7 @@ class MemoryPool { virtual int64_t bytes_allocated() const = 0; }; -MemoryPool* GetDefaultMemoryPool(); +MemoryPool* default_memory_pool(); } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc index c6e113e..43cb87e 100644 --- a/cpp/src/arrow/util/status.cc +++ b/cpp/src/arrow/util/status.cc @@ -54,6 +54,9 @@ std::string Status::CodeAsString() const { case StatusCode::Invalid: type = "Invalid"; break; + case StatusCode::IOError: + type = "IOError"; + break; case StatusCode::NotImplemented: type = "NotImplemented"; break; http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/util/status.h index 47fda40..b593123 100644 --- a/cpp/src/arrow/util/status.h +++ b/cpp/src/arrow/util/status.h @@ -63,6 +63,7 @@ enum class StatusCode: char { OutOfMemory = 1, KeyError = 2, Invalid = 3, + IOError = 4, NotImplemented = 10, }; @@ -97,12 +98,17 @@ class Status { return Status(StatusCode::Invalid, msg, -1); } + static Status IOError(const std::string& msg) { + return Status(StatusCode::IOError, msg, -1); + } + // Returns true iff the status indicates success. bool ok() const { return (state_ == NULL); } bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; } bool IsKeyError() const { return code() == StatusCode::KeyError; } bool IsInvalid() const { return code() == StatusCode::Invalid; } + bool IsIOError() const { return code() == StatusCode::IOError; } // Return a string representation of this status suitable for printing. // Returns the string "OK" for success. http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/test_main.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/test_main.cc b/cpp/src/arrow/util/test_main.cc index 00139f3..adc8466 100644 --- a/cpp/src/arrow/util/test_main.cc +++ b/cpp/src/arrow/util/test_main.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include +#include "gtest/gtest.h" int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/build_thirdparty.sh ---------------------------------------------------------------------- diff --git a/cpp/thirdparty/build_thirdparty.sh b/cpp/thirdparty/build_thirdparty.sh index 294737c..3d5f532 100755 --- a/cpp/thirdparty/build_thirdparty.sh +++ b/cpp/thirdparty/build_thirdparty.sh @@ -17,6 +17,7 @@ else case $arg in "gtest") F_GTEST=1 ;; "gbenchmark") F_GBENCHMARK=1 ;; + "flatbuffers") F_FLATBUFFERS=1 ;; *) echo "Unknown module: $arg"; exit 1 ;; esac done @@ -78,6 +79,14 @@ if [ -n "$F_ALL" -o -n "$F_GBENCHMARK" ]; then make VERBOSE=1 install || { echo "make $GBENCHMARK_ERROR" ; exit 1; } fi +FLATBUFFERS_ERROR="failed for flatbuffers" +if [ -n "$F_ALL" -o -n "$F_FLATBUFFERS" ]; then + cd $TP_DIR/$FLATBUFFERS_BASEDIR + + CXXFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX -DFLATBUFFERS_BUILD_TESTS=OFF . || { echo "cmake $FLATBUFFERS_ERROR" ; exit 1; } + make -j$PARALLEL + make install +fi echo "---------------------" echo "Thirdparty dependencies built and installed into $PREFIX successfully" http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/download_thirdparty.sh ---------------------------------------------------------------------- diff --git a/cpp/thirdparty/download_thirdparty.sh b/cpp/thirdparty/download_thirdparty.sh index d22c559..d299afc 100755 --- a/cpp/thirdparty/download_thirdparty.sh +++ b/cpp/thirdparty/download_thirdparty.sh @@ -25,3 +25,8 @@ if [ ! -d ${GBENCHMARK_BASEDIR} ]; then echo "Fetching google benchmark" download_extract_and_cleanup $GBENCHMARK_URL fi + +if [ ! -d ${FLATBUFFERS_BASEDIR} ]; then + echo "Fetching flatbuffers" + download_extract_and_cleanup $FLATBUFFERS_URL +fi http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/versions.sh ---------------------------------------------------------------------- diff --git a/cpp/thirdparty/versions.sh b/cpp/thirdparty/versions.sh index 9cfc7cd..cb455b4 100755 --- a/cpp/thirdparty/versions.sh +++ b/cpp/thirdparty/versions.sh @@ -5,3 +5,7 @@ GTEST_BASEDIR=googletest-release-$GTEST_VERSION GBENCHMARK_VERSION=1.0.0 GBENCHMARK_URL="https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz" GBENCHMARK_BASEDIR=benchmark-$GBENCHMARK_VERSION + +FLATBUFFERS_VERSION=1.3.0 +FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz" +FLATBUFFERS_BASEDIR=flatbuffers-$FLATBUFFERS_VERSION http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/format/Message.fbs ---------------------------------------------------------------------- diff --git a/format/Message.fbs b/format/Message.fbs new file mode 100644 index 0000000..3ffd203 --- /dev/null +++ b/format/Message.fbs @@ -0,0 +1,183 @@ +namespace apache.arrow.flatbuf; + +/// ---------------------------------------------------------------------- +/// Logical types and their metadata (if any) +/// +/// These are stored in the flatbuffer in the Type union below + +/// A Tuple in the flatbuffer metadata is the same as an Arrow Struct +/// (according to the physical memory layout). We used Tuple here as Struct is +/// a reserved word in Flatbuffers +table Tuple { +} + +table List { +} + +enum UnionMode:int { Sparse, Dense } + +table Union { + mode: UnionMode; +} + +table Bit { +} + +table Int { + bitWidth: int; // 1 to 64 + is_signed: bool; +} + +enum Precision:int {SINGLE, DOUBLE} + +table FloatingPoint { + precision: Precision; +} + +table Utf8 { +} + +table Binary { +} + +table Bool { +} + +table Decimal { + precision: int; + scale: int; +} + +table Timestamp { + timezone: string; +} + +table JSONScalar { + dense:bool=true; +} + +/// ---------------------------------------------------------------------- +/// Top-level Type value, enabling extensible type-specific metadata. We can +/// add new logical types to Type without breaking backwards compatibility + +union Type { + Int, + Bit, + FloatingPoint, + Binary, + Utf8, + Bool, + Decimal, + Timestamp, + List, + Tuple, + Union, + JSONScalar +} + +/// ---------------------------------------------------------------------- +/// A field represents a named column in a record / row batch or child of a +/// nested type. +/// +/// - children is only for nested Arrow arrays +/// - For primitive types, children will have length 0 +/// - nullable should default to true in general + +table Field { + // Name is not required, in i.e. a List + name: string; + nullable: bool; + type: Type; + children: [Field]; +} + +/// ---------------------------------------------------------------------- +/// A Schema describes the columns in a row batch + +table Schema { + fields: [Field]; +} + +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) + +/// A Buffer represents a single contiguous memory segment +struct Buffer { + /// The shared memory page id where this buffer is located. Currently this is + /// not used + page: int; + + /// The relative offset into the shared memory page where the bytes for this + /// buffer starts + offset: long; + + /// The absolute length (in bytes) of the memory buffer. The memory is found + /// from offset (inclusive) to offset + length (non-inclusive). + length: long; +} + +/// Metadata about a field at some level of a nested type tree (but not +/// its children). +/// +/// For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] +/// would have {length: 5, null_count: 2} for its List node, and {length: 6, +/// null_count: 0} for its Int16 node, as separate FieldNode structs +struct FieldNode { + /// The number of value slots in the Arrow array at this level of a nested + /// tree + length: int; + + /// The number of observed nulls. Fields with null_count == 0 may choose not + /// to write their physical null bitmap out as a materialized buffer, instead + /// setting the length of the null buffer to 0. + null_count: int; +} + +/// A data header describing the shared memory layout of a "record" or "row" +/// batch. Some systems call this a "row batch" internally and others a "record +/// batch". +table RecordBatch { + /// number of records / rows. The arrays in the batch should all have this + /// length + length: int; + + /// Nodes correspond to the pre-ordered flattened logical schema + nodes: [FieldNode]; + + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the schema. For + /// example, most primitive arrays will have 2 buffers, 1 for the null bitmap + /// and 1 for the values. For struct arrays, there will only be a single + /// buffer for the null bitmap + buffers: [Buffer]; +} + +/// ---------------------------------------------------------------------- +/// For sending dictionary encoding information. Any Field can be +/// dictionary-encoded, but in this case none of its children may be +/// dictionary-encoded. +/// +/// TODO(wesm): To be documented in more detail + +table DictionaryBatch { + id: long; + data: RecordBatch; +} + +/// ---------------------------------------------------------------------- +/// The root Message type + +/// This union enables us to easily send different message types without +/// redundant storage, and in the future we can easily add new message types. +union MessageHeader { + Schema, DictionaryBatch, RecordBatch +} + +table Message { + header: MessageHeader; + bodyLength: long; +} + +root_type Message; http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/__init__.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 8d93a15..9a08070 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -35,4 +35,6 @@ from pyarrow.schema import (null, bool_, uint8, uint16, uint32, uint64, float_, double, string, list_, struct, field, - DataType, Field, Schema) + DataType, Field, Schema, schema) + +from pyarrow.array import RowBatch http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd index d0d3486..de3c774 100644 --- a/python/pyarrow/array.pxd +++ b/python/pyarrow/array.pxd @@ -16,7 +16,7 @@ # under the License. from pyarrow.includes.common cimport shared_ptr -from pyarrow.includes.libarrow cimport CArray, LogicalType +from pyarrow.includes.libarrow cimport CArray from pyarrow.scalar import NA http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index bceb333..c5d40dd 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -28,6 +28,9 @@ from pyarrow.error cimport check_status cimport pyarrow.scalar as scalar from pyarrow.scalar import NA +from pyarrow.schema cimport Schema +import pyarrow.schema as schema + def total_allocated_bytes(): cdef MemoryPool* pool = pyarrow.GetMemoryPool() return pool.bytes_allocated() @@ -155,12 +158,12 @@ cdef class StringArray(Array): cdef dict _array_classes = { - LogicalType_NA: NullArray, - LogicalType_BOOL: BooleanArray, - LogicalType_INT64: Int64Array, - LogicalType_DOUBLE: DoubleArray, - LogicalType_LIST: ListArray, - LogicalType_STRING: StringArray, + Type_NA: NullArray, + Type_BOOL: BooleanArray, + Type_INT64: Int64Array, + Type_DOUBLE: DoubleArray, + Type_LIST: ListArray, + Type_STRING: StringArray, } cdef object box_arrow_array(const shared_ptr[CArray]& sp_array): @@ -190,3 +193,35 @@ def from_pylist(object list_obj, DataType type=None): raise NotImplementedError return box_arrow_array(sp_array) + +#---------------------------------------------------------------------- +# Table-like data structures + +cdef class RowBatch: + """ + + """ + cdef readonly: + Schema schema + int num_rows + list arrays + + def __cinit__(self, Schema schema, int num_rows, list arrays): + self.schema = schema + self.num_rows = num_rows + self.arrays = arrays + + if len(self.schema) != len(arrays): + raise ValueError('Mismatch number of data arrays and ' + 'schema fields') + + def __len__(self): + return self.num_rows + + property num_columns: + + def __get__(self): + return len(self.arrays) + + def __getitem__(self, i): + return self.arrays[i] http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/libarrow.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index baba112..e6afcbd 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -21,31 +21,30 @@ from pyarrow.includes.common cimport * cdef extern from "arrow/api.h" namespace "arrow" nogil: - enum LogicalType" arrow::LogicalType::type": - LogicalType_NA" arrow::LogicalType::NA" + enum Type" arrow::Type::type": + Type_NA" arrow::Type::NA" - LogicalType_BOOL" arrow::LogicalType::BOOL" + Type_BOOL" arrow::Type::BOOL" - LogicalType_UINT8" arrow::LogicalType::UINT8" - LogicalType_INT8" arrow::LogicalType::INT8" - LogicalType_UINT16" arrow::LogicalType::UINT16" - LogicalType_INT16" arrow::LogicalType::INT16" - LogicalType_UINT32" arrow::LogicalType::UINT32" - LogicalType_INT32" arrow::LogicalType::INT32" - LogicalType_UINT64" arrow::LogicalType::UINT64" - LogicalType_INT64" arrow::LogicalType::INT64" + Type_UINT8" arrow::Type::UINT8" + Type_INT8" arrow::Type::INT8" + Type_UINT16" arrow::Type::UINT16" + Type_INT16" arrow::Type::INT16" + Type_UINT32" arrow::Type::UINT32" + Type_INT32" arrow::Type::INT32" + Type_UINT64" arrow::Type::UINT64" + Type_INT64" arrow::Type::INT64" - LogicalType_FLOAT" arrow::LogicalType::FLOAT" - LogicalType_DOUBLE" arrow::LogicalType::DOUBLE" + Type_FLOAT" arrow::Type::FLOAT" + Type_DOUBLE" arrow::Type::DOUBLE" - LogicalType_STRING" arrow::LogicalType::STRING" + Type_STRING" arrow::Type::STRING" - LogicalType_LIST" arrow::LogicalType::LIST" - LogicalType_STRUCT" arrow::LogicalType::STRUCT" + Type_LIST" arrow::Type::LIST" + Type_STRUCT" arrow::Type::STRUCT" cdef cppclass CDataType" arrow::DataType": - LogicalType type - c_bool nullable + Type type c_bool Equals(const CDataType* other) @@ -55,8 +54,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: int64_t bytes_allocated() cdef cppclass CListType" arrow::ListType"(CDataType): - CListType(const shared_ptr[CDataType]& value_type, - c_bool nullable) + CListType(const shared_ptr[CDataType]& value_type) cdef cppclass CStringType" arrow::StringType"(CDataType): pass @@ -65,21 +63,26 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_string name shared_ptr[CDataType] type - CField(const c_string& name, const shared_ptr[CDataType]& type) + c_bool nullable + + CField(const c_string& name, const shared_ptr[CDataType]& type, + c_bool nullable) cdef cppclass CStructType" arrow::StructType"(CDataType): - CStructType(const vector[shared_ptr[CField]]& fields, - c_bool nullable) + CStructType(const vector[shared_ptr[CField]]& fields) cdef cppclass CSchema" arrow::Schema": - CSchema(const shared_ptr[CField]& fields) + CSchema(const vector[shared_ptr[CField]]& fields) + const shared_ptr[CField]& field(int i) + int num_fields() + c_string ToString() cdef cppclass CArray" arrow::Array": const shared_ptr[CDataType]& type() int32_t length() int32_t null_count() - LogicalType logical_type() + Type type_enum() c_bool IsNull(int i) @@ -122,3 +125,57 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CStringArray" arrow::StringArray"(CListArray): c_string GetString(int i) + + +cdef extern from "arrow/api.h" namespace "arrow" nogil: + # We can later add more of the common status factory methods as needed + cdef CStatus CStatus_OK "Status::OK"() + + cdef cppclass CStatus "arrow::Status": + CStatus() + + c_string ToString() + + c_bool ok() + c_bool IsOutOfMemory() + c_bool IsKeyError() + c_bool IsNotImplemented() + c_bool IsInvalid() + + cdef cppclass Buffer: + uint8_t* data() + int64_t size() + + +cdef extern from "arrow/ipc/metadata.h" namespace "arrow::ipc" nogil: + cdef cppclass SchemaMessage: + int num_fields() + CStatus GetField(int i, shared_ptr[CField]* out) + CStatus GetSchema(shared_ptr[CSchema]* out) + + cdef cppclass FieldMetadata: + pass + + cdef cppclass BufferMetadata: + pass + + cdef cppclass RecordBatchMessage: + pass + + cdef cppclass DictionaryBatchMessage: + pass + + enum MessageType" arrow::ipc::Message::Type": + MessageType_SCHEMA" arrow::ipc::Message::SCHEMA" + MessageType_RECORD_BATCH" arrow::ipc::Message::RECORD_BATCH" + MessageType_DICTIONARY_BATCH" arrow::ipc::Message::DICTIONARY_BATCH" + + cdef cppclass Message: + CStatus Open(const shared_ptr[Buffer]& buf, + shared_ptr[Message]* out) + int64_t body_length() + MessageType type() + + shared_ptr[SchemaMessage] GetSchema() + shared_ptr[RecordBatchMessage] GetRecordBatch() + shared_ptr[DictionaryBatchMessage] GetDictionaryBatch() http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/pyarrow.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd index 9a0c004..eedfc85 100644 --- a/python/pyarrow/includes/pyarrow.pxd +++ b/python/pyarrow/includes/pyarrow.pxd @@ -18,8 +18,7 @@ # distutils: language = c++ from pyarrow.includes.common cimport * -from pyarrow.includes.libarrow cimport (CArray, CDataType, LogicalType, - MemoryPool) +from pyarrow.includes.libarrow cimport CArray, CDataType, Type, MemoryPool cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil: # We can later add more of the common status factory methods as needed @@ -39,7 +38,7 @@ cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil: c_bool IsNotImplemented() c_bool IsArrowError() - shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable) + shared_ptr[CDataType] GetPrimitiveType(Type type) Status ConvertPySequence(object obj, shared_ptr[CArray]* out) MemoryPool* GetMemoryPool() http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/scalar.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx index 261a389..04f013d 100644 --- a/python/pyarrow/scalar.pyx +++ b/python/pyarrow/scalar.pyx @@ -172,18 +172,18 @@ cdef class ListValue(ArrayValue): cdef dict _scalar_classes = { - LogicalType_UINT8: Int8Value, - LogicalType_UINT16: Int16Value, - LogicalType_UINT32: Int32Value, - LogicalType_UINT64: Int64Value, - LogicalType_INT8: Int8Value, - LogicalType_INT16: Int16Value, - LogicalType_INT32: Int32Value, - LogicalType_INT64: Int64Value, - LogicalType_FLOAT: FloatValue, - LogicalType_DOUBLE: DoubleValue, - LogicalType_LIST: ListValue, - LogicalType_STRING: StringValue + Type_UINT8: Int8Value, + Type_UINT16: Int16Value, + Type_UINT32: Int32Value, + Type_UINT64: Int64Value, + Type_INT8: Int8Value, + Type_INT16: Int16Value, + Type_INT32: Int32Value, + Type_INT64: Int64Value, + Type_FLOAT: FloatValue, + Type_DOUBLE: DoubleValue, + Type_LIST: ListValue, + Type_STRING: StringValue } cdef object box_arrow_scalar(DataType type, http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd index 07b9bd0..61458b7 100644 --- a/python/pyarrow/schema.pxd +++ b/python/pyarrow/schema.pxd @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from pyarrow.includes.common cimport shared_ptr +from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport CDataType, CField, CSchema cdef class DataType: @@ -33,9 +33,13 @@ cdef class Field: cdef readonly: DataType type + cdef init(self, const shared_ptr[CField]& field) + cdef class Schema: cdef: shared_ptr[CSchema] sp_schema CSchema* schema + cdef init(self, const vector[shared_ptr[CField]]& fields) + cdef DataType box_data_type(const shared_ptr[CDataType]& type) http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx index ea87872..b3bf02a 100644 --- a/python/pyarrow/schema.pyx +++ b/python/pyarrow/schema.pyx @@ -54,94 +54,153 @@ cdef class DataType: cdef class Field: - def __cinit__(self, object name, DataType type): - self.type = type - self.sp_field.reset(new CField(tobytes(name), type.sp_type)) - self.field = self.sp_field.get() + def __cinit__(self): + pass + + cdef init(self, const shared_ptr[CField]& field): + self.sp_field = field + self.field = field.get() + + @classmethod + def from_py(cls, object name, DataType type, bint nullable=True): + cdef Field result = Field() + result.type = type + result.sp_field.reset(new CField(tobytes(name), type.sp_type, + nullable)) + result.field = result.sp_field.get() + + return result def __repr__(self): return 'Field({0!r}, type={1})'.format(self.name, str(self.type)) + property nullable: + + def __get__(self): + return self.field.nullable + property name: def __get__(self): return frombytes(self.field.name) +cdef class Schema: + + def __cinit__(self): + pass + + def __len__(self): + return self.schema.num_fields() + + def __getitem__(self, i): + if i < 0 or i >= len(self): + raise IndexError("{0} is out of bounds".format(i)) + + cdef Field result = Field() + result.init(self.schema.field(i)) + result.type = box_data_type(result.field.type) + + return result + + cdef init(self, const vector[shared_ptr[CField]]& fields): + self.schema = new CSchema(fields) + self.sp_schema.reset(self.schema) + + @classmethod + def from_fields(cls, fields): + cdef: + Schema result + Field field + vector[shared_ptr[CField]] c_fields + + c_fields.resize(len(fields)) + + for i in range(len(fields)): + field = fields[i] + c_fields[i] = field.sp_field + + result = Schema() + result.init(c_fields) + + return result + + def __repr__(self): + return frombytes(self.schema.ToString()) + cdef dict _type_cache = {} -cdef DataType primitive_type(LogicalType type, bint nullable=True): - if (type, nullable) in _type_cache: - return _type_cache[type, nullable] +cdef DataType primitive_type(Type type): + if type in _type_cache: + return _type_cache[type] cdef DataType out = DataType() - out.init(pyarrow.GetPrimitiveType(type, nullable)) + out.init(pyarrow.GetPrimitiveType(type)) - _type_cache[type, nullable] = out + _type_cache[type] = out return out #------------------------------------------------------------ # Type factory functions -def field(name, type): - return Field(name, type) +def field(name, type, bint nullable=True): + return Field.from_py(name, type, nullable) cdef set PRIMITIVE_TYPES = set([ - LogicalType_NA, LogicalType_BOOL, - LogicalType_UINT8, LogicalType_INT8, - LogicalType_UINT16, LogicalType_INT16, - LogicalType_UINT32, LogicalType_INT32, - LogicalType_UINT64, LogicalType_INT64, - LogicalType_FLOAT, LogicalType_DOUBLE]) + Type_NA, Type_BOOL, + Type_UINT8, Type_INT8, + Type_UINT16, Type_INT16, + Type_UINT32, Type_INT32, + Type_UINT64, Type_INT64, + Type_FLOAT, Type_DOUBLE]) def null(): - return primitive_type(LogicalType_NA) + return primitive_type(Type_NA) -def bool_(c_bool nullable=True): - return primitive_type(LogicalType_BOOL, nullable) +def bool_(): + return primitive_type(Type_BOOL) -def uint8(c_bool nullable=True): - return primitive_type(LogicalType_UINT8, nullable) +def uint8(): + return primitive_type(Type_UINT8) -def int8(c_bool nullable=True): - return primitive_type(LogicalType_INT8, nullable) +def int8(): + return primitive_type(Type_INT8) -def uint16(c_bool nullable=True): - return primitive_type(LogicalType_UINT16, nullable) +def uint16(): + return primitive_type(Type_UINT16) -def int16(c_bool nullable=True): - return primitive_type(LogicalType_INT16, nullable) +def int16(): + return primitive_type(Type_INT16) -def uint32(c_bool nullable=True): - return primitive_type(LogicalType_UINT32, nullable) +def uint32(): + return primitive_type(Type_UINT32) -def int32(c_bool nullable=True): - return primitive_type(LogicalType_INT32, nullable) +def int32(): + return primitive_type(Type_INT32) -def uint64(c_bool nullable=True): - return primitive_type(LogicalType_UINT64, nullable) +def uint64(): + return primitive_type(Type_UINT64) -def int64(c_bool nullable=True): - return primitive_type(LogicalType_INT64, nullable) +def int64(): + return primitive_type(Type_INT64) -def float_(c_bool nullable=True): - return primitive_type(LogicalType_FLOAT, nullable) +def float_(): + return primitive_type(Type_FLOAT) -def double(c_bool nullable=True): - return primitive_type(LogicalType_DOUBLE, nullable) +def double(): + return primitive_type(Type_DOUBLE) -def string(c_bool nullable=True): +def string(): """ UTF8 string """ - return primitive_type(LogicalType_STRING, nullable) + return primitive_type(Type_STRING) -def list_(DataType value_type, c_bool nullable=True): +def list_(DataType value_type): cdef DataType out = DataType() - out.init(shared_ptr[CDataType]( - new CListType(value_type.sp_type, nullable))) + out.init(shared_ptr[CDataType](new CListType(value_type.sp_type))) return out -def struct(fields, c_bool nullable=True): +def struct(fields): """ """ @@ -154,9 +213,11 @@ def struct(fields, c_bool nullable=True): c_fields.push_back(field.sp_field) out.init(shared_ptr[CDataType]( - new CStructType(c_fields, nullable))) + new CStructType(c_fields))) return out +def schema(fields): + return Schema.from_fields(fields) cdef DataType box_data_type(const shared_ptr[CDataType]& type): cdef DataType out = DataType() http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_schema.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index 0235526..2894ea8 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -18,6 +18,8 @@ from pyarrow.compat import unittest import pyarrow as arrow +A = arrow + class TestTypes(unittest.TestCase): @@ -28,15 +30,12 @@ class TestTypes(unittest.TestCase): for name in dtypes: factory = getattr(arrow, name) t = factory() - t_required = factory(False) - assert str(t) == name - assert str(t_required) == '{0} not null'.format(name) def test_list(self): value_type = arrow.int32() list_type = arrow.list_(value_type) - assert str(list_type) == 'list' + assert str(list_type) == 'list' def test_string(self): t = arrow.string() @@ -47,5 +46,26 @@ class TestTypes(unittest.TestCase): f = arrow.field('foo', t) assert f.name == 'foo' + assert f.nullable assert f.type is t assert repr(f) == "Field('foo', type=string)" + + f = arrow.field('foo', t, False) + assert not f.nullable + + def test_schema(self): + fields = [ + A.field('foo', A.int32()), + A.field('bar', A.string()), + A.field('baz', A.list_(A.int8())) + ] + sch = A.schema(fields) + + assert len(sch) == 3 + assert sch[0].name == 'foo' + assert sch[0].type == fields[0].type + + assert repr(sch) == """\ +foo: int32 +bar: string +baz: list""" http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_table.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py new file mode 100644 index 0000000..2e24445 --- /dev/null +++ b/python/pyarrow/tests/test_table.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyarrow.compat import unittest +import pyarrow as arrow + +A = arrow + + +class TestRowBatch(unittest.TestCase): + + def test_basics(self): + data = [ + A.from_pylist(range(5)), + A.from_pylist([-10, -5, 0, 5, 10]) + ] + num_rows = 5 + + descr = A.schema([A.field('c0', data[0].type), + A.field('c1', data[1].type)]) + + batch = A.RowBatch(descr, num_rows, data) + + assert len(batch) == num_rows + assert batch.num_rows == num_rows + assert batch.num_columns == len(data) http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/adapters/builtin.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index bb79052..acb13ac 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -27,7 +27,7 @@ using arrow::ArrayBuilder; using arrow::DataType; -using arrow::LogicalType; +using arrow::Type; namespace pyarrow { @@ -356,17 +356,17 @@ class ListConverter : public TypedConverter { // Dynamic constructor for sequence converters std::shared_ptr GetConverter(const std::shared_ptr& type) { switch (type->type) { - case LogicalType::BOOL: + case Type::BOOL: return std::make_shared(); - case LogicalType::INT64: + case Type::INT64: return std::make_shared(); - case LogicalType::DOUBLE: + case Type::DOUBLE: return std::make_shared(); - case LogicalType::STRING: + case Type::STRING: return std::make_shared(); - case LogicalType::LIST: + case Type::LIST: return std::make_shared(); - case LogicalType::STRUCT: + case Type::STRUCT: default: return nullptr; break; @@ -378,7 +378,7 @@ Status ListConverter::Init(const std::shared_ptr& builder) { typed_builder_ = static_cast(builder.get()); value_converter_ = GetConverter(static_cast( - builder->type().get())->value_type); + builder->type().get())->value_type()); if (value_converter_ == nullptr) { return Status::NotImplemented("value type not implemented"); } @@ -393,8 +393,8 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type)); // Handle NA / NullType case - if (type->type == LogicalType::NA) { - out->reset(new arrow::Array(type, size, size)); + if (type->type == Type::NA) { + out->reset(new arrow::NullArray(type, size)); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc index 0921fc4..08003aa 100644 --- a/python/src/pyarrow/helpers.cc +++ b/python/src/pyarrow/helpers.cc @@ -37,19 +37,14 @@ const std::shared_ptr FLOAT = std::make_shared(); const std::shared_ptr DOUBLE = std::make_shared(); const std::shared_ptr STRING = std::make_shared(); -#define GET_PRIMITIVE_TYPE(NAME, Type) \ - case LogicalType::NAME: \ - if (nullable) { \ - return NAME; \ - } else { \ - return std::make_shared(nullable); \ - } \ +#define GET_PRIMITIVE_TYPE(NAME, Class) \ + case Type::NAME: \ + return NAME; \ break; -std::shared_ptr GetPrimitiveType(LogicalType::type type, - bool nullable) { +std::shared_ptr GetPrimitiveType(Type::type type) { switch (type) { - case LogicalType::NA: + case Type::NA: return NA; GET_PRIMITIVE_TYPE(UINT8, UInt8Type); GET_PRIMITIVE_TYPE(INT8, Int8Type); http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.h ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/helpers.h b/python/src/pyarrow/helpers.h index e41568d..ec42bb3 100644 --- a/python/src/pyarrow/helpers.h +++ b/python/src/pyarrow/helpers.h @@ -24,7 +24,7 @@ namespace pyarrow { using arrow::DataType; -using arrow::LogicalType; +using arrow::Type; extern const std::shared_ptr NA; extern const std::shared_ptr BOOL; @@ -40,8 +40,7 @@ extern const std::shared_ptr FLOAT; extern const std::shared_ptr DOUBLE; extern const std::shared_ptr STRING; -std::shared_ptr GetPrimitiveType(LogicalType::type type, - bool nullable); +std::shared_ptr GetPrimitiveType(Type::type type); } // namespace pyarrow