arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-37: [C++ / Python] Implement BooleanArray and BooleanBuilder. Handle Python built-in bool
Date Fri, 25 Mar 2016 23:52:21 GMT
Repository: arrow
Updated Branches:
  refs/heads/master c06b7654b -> 0a8979d3a


ARROW-37: [C++ / Python] Implement BooleanArray and BooleanBuilder. Handle Python built-in bool

This patch implements `arrow::Type::BOOL` in bit-packed form.

ARROW-59 (Support for boolean in Python lists) is also addressed here.

Author: Wes McKinney <wesm@apache.org>

Closes #40 from wesm/ARROW-37 and squashes the following commits:

f6b60f1 [Wes McKinney] Clean up template instantiations to fix clang
2d3b855 [Wes McKinney] Use gcc-4.9 to build thirdparty, too
74f704b [Wes McKinney] Try to fix clang issues, make some PrimitiveBuilder methods protected
dc8d0a4 [Wes McKinney] Fix up pyarrow per C++ API changes. Add boolean builtin conversion / API support
2299a6c [Wes McKinney] Install logging.h
2dae07e [Wes McKinney] cpplint
0f55344 [Wes McKinney] Initialize memory to 0 after PoolBuffer::Resize to avoid boolean bit setting issues
83527a9 [Wes McKinney] Draft BooleanBuilder and arrange to share common code between normal numeric builders and boolean builder


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0a8979d3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0a8979d3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0a8979d3

Branch: refs/heads/master
Commit: 0a8979d3ab80374eb4f84d08e060bcec70174c73
Parents: c06b765
Author: Wes McKinney <wesm@apache.org>
Authored: Fri Mar 25 16:52:11 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Fri Mar 25 16:52:11 2016 -0700

----------------------------------------------------------------------
 .travis.yml                                  |   4 +-
 cpp/src/arrow/api.h                          |   1 -
 cpp/src/arrow/array-test.cc                  |   5 +-
 cpp/src/arrow/builder.cc                     |   7 +
 cpp/src/arrow/builder.h                      |   2 +
 cpp/src/arrow/test-util.h                    |  31 ++-
 cpp/src/arrow/type.h                         |  15 +-
 cpp/src/arrow/types/CMakeLists.txt           |   1 -
 cpp/src/arrow/types/boolean.h                |  32 ---
 cpp/src/arrow/types/construct.cc             |   3 +-
 cpp/src/arrow/types/list-test.cc             |   5 +-
 cpp/src/arrow/types/list.h                   |  11 +-
 cpp/src/arrow/types/primitive-test.cc        | 214 ++++++++++-----
 cpp/src/arrow/types/primitive.cc             | 179 ++++++++++++-
 cpp/src/arrow/types/primitive.h              | 302 ++++++++++++++--------
 cpp/src/arrow/types/string-test.cc           |   2 +-
 cpp/src/arrow/util/CMakeLists.txt            |   3 +-
 cpp/src/arrow/util/bit-util.cc               |  14 +-
 cpp/src/arrow/util/bit-util.h                |  13 +-
 python/pyarrow/includes/libarrow.pxd         |   3 +
 python/pyarrow/scalar.pyx                    |   6 +-
 python/pyarrow/tests/test_convert_builtin.py |   5 +-
 python/pyarrow/tests/test_scalars.py         |  39 +--
 python/src/pyarrow/adapters/builtin.cc       |  26 +-
 24 files changed, 643 insertions(+), 280 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 49a956e..d89a200 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,10 +20,10 @@ matrix:
     language: cpp
     os: linux
     before_script:
-    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
-    script:
     - export CC="gcc-4.9"
     - export CXX="g++-4.9"
+    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
+    script:
     - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
     - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh
   - compiler: clang

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 7be7f88..2ae80f6 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -27,7 +27,6 @@
 #include "arrow/table.h"
 #include "arrow/type.h"
 
-#include "arrow/types/boolean.h"
 #include "arrow/types/construct.h"
 #include "arrow/types/list.h"
 #include "arrow/types/primitive.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 7c6eaf5..121b802 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -71,8 +71,7 @@ TEST_F(TestArray, TestIsNull) {
     if (x == 0) ++null_count;
   }
 
-  std::shared_ptr<Buffer> null_buf = test::bytes_to_null_buffer(null_bitmap.data(),
-      null_bitmap.size());
+  std::shared_ptr<Buffer> null_buf = test::bytes_to_null_buffer(null_bitmap);
   std::unique_ptr<Array> arr;
   arr.reset(new Int32Array(null_bitmap.size(), nullptr, null_count, null_buf));
 
@@ -82,7 +81,7 @@ TEST_F(TestArray, TestIsNull) {
   ASSERT_TRUE(arr->null_bitmap()->Equals(*null_buf.get()));
 
   for (size_t i = 0; i < null_bitmap.size(); ++i) {
-    EXPECT_EQ(static_cast<bool>(null_bitmap[i]), !arr->IsNull(i)) << i;
+    EXPECT_EQ(null_bitmap[i], !arr->IsNull(i)) << i;
   }
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 6a62dc3..4061f35 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -54,5 +54,12 @@ Status ArrayBuilder::Advance(int32_t elements) {
   return Status::OK();
 }
 
+Status ArrayBuilder::Reserve(int32_t elements) {
+  if (length_ + elements > capacity_) {
+    int32_t new_capacity = util::next_power2(length_ + elements);
+    return Resize(new_capacity);
+  }
+  return Status::OK();
+}
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 308e54c..d1a49dc 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -69,6 +69,8 @@ class ArrayBuilder {
   // Resizes the null_bitmap array
   Status Resize(int32_t new_bits);
 
+  Status Reserve(int32_t extra_bits);
+
   // For cases where raw data was memcpy'd into the internal buffers, allows us
   // to advance the length of the builder. It is your responsibility to use
   // this function responsibly.

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index ea3ce5f..b2bce26 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -99,27 +99,26 @@ void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
 
 
 template <typename T>
+void random_real(int n, uint32_t seed, T min_value, T max_value,
+    std::vector<T>* out) {
+  std::mt19937 gen(seed);
+  std::uniform_real_distribution<T> d(min_value, max_value);
+  for (int i = 0; i < n; ++i) {
+    out->push_back(d(gen));
+  }
+}
+
+
+template <typename T>
 std::shared_ptr<Buffer> to_buffer(const std::vector<T>& values) {
   return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(values.data()),
       values.size() * sizeof(T));
 }
 
-void random_null_bitmap(int64_t n, double pct_null, std::vector<uint8_t>* null_bitmap) {
-  Random rng(random_seed());
-  for (int i = 0; i < n; ++i) {
-    if (rng.NextDoubleFraction() > pct_null) {
-      null_bitmap->push_back(1);
-    } else {
-      // null
-      null_bitmap->push_back(0);
-    }
-  }
-}
-
-void random_null_bitmap(int64_t n, double pct_null, std::vector<bool>* null_bitmap) {
+void random_null_bitmap(int64_t n, double pct_null, uint8_t* null_bitmap) {
   Random rng(random_seed());
   for (int i = 0; i < n; ++i) {
-    null_bitmap->push_back(rng.NextDoubleFraction() > pct_null);
+    null_bitmap[i] = rng.NextDoubleFraction() > pct_null;
   }
 }
 
@@ -160,11 +159,11 @@ static inline int null_count(const std::vector<uint8_t>& valid_bytes) {
   return result;
 }
 
-std::shared_ptr<Buffer> bytes_to_null_buffer(uint8_t* bytes, int length) {
+std::shared_ptr<Buffer> bytes_to_null_buffer(const std::vector<uint8_t>& bytes) {
   std::shared_ptr<Buffer> out;
 
   // TODO(wesm): error checking
-  util::bytes_to_bits(bytes, length, &out);
+  util::bytes_to_bits(bytes, &out);
   return out;
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 5984b67..86e4779 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -132,6 +132,10 @@ struct DataType {
     return children_.size();
   }
 
+  virtual int value_size() const {
+    return -1;
+  }
+
   virtual std::string ToString() const = 0;
 };
 
@@ -191,11 +195,14 @@ inline std::string PrimitiveType<Derived>::ToString() const {
 #define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME)  \
   typedef C_TYPE c_type;                                    \
   static constexpr Type::type type_enum = Type::ENUM;       \
-  static constexpr int size = SIZE;                         \
                                                             \
   TYPENAME()                                                \
       : PrimitiveType<TYPENAME>() {}                        \
                                                             \
+  virtual int value_size() const {                          \
+    return SIZE;                                            \
+  }                                                         \
+                                                            \
   static const char* name() {                               \
     return NAME;                                            \
   }
@@ -295,6 +302,12 @@ struct StructType : public DataType {
   std::string ToString() const override;
 };
 
+// These will be defined elsewhere
+template <typename T>
+struct type_traits {
+};
+
+
 } // namespace arrow
 
 #endif  // ARROW_TYPE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 595b3be..f3e4128 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -21,7 +21,6 @@
 
 # Headers: top level
 install(FILES
-  boolean.h
   collection.h
   construct.h
   datetime.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
deleted file mode 100644
index 1cb91f9..0000000
--- a/cpp/src/arrow/types/boolean.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_BOOLEAN_H
-#define ARROW_TYPES_BOOLEAN_H
-
-#include "arrow/types/primitive.h"
-
-namespace arrow {
-
-// typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
-
-class BooleanBuilder : public ArrayBuilder {
-};
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_BOOLEAN_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index df2317c..34647a5 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -51,7 +51,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     BUILDER_CASE(UINT64, UInt64Builder);
     BUILDER_CASE(INT64, Int64Builder);
 
-    // BUILDER_CASE(BOOL, BooleanBuilder);
+    BUILDER_CASE(BOOL, BooleanBuilder);
 
     BUILDER_CASE(FLOAT, FloatBuilder);
     BUILDER_CASE(DOUBLE, DoubleBuilder);
@@ -83,6 +83,7 @@ Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
     int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap,
     std::shared_ptr<Array>* out) {
   switch (type->type) {
+    MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray);
     MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
     MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
     MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index eb55ca8..4eb560e 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -116,11 +116,14 @@ TEST_F(TestListBuilder, TestBasics) {
 
   Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
 
+  EXPECT_OK(builder_->Reserve(lengths.size()));
+  EXPECT_OK(vb->Reserve(values.size()));
+
   int pos = 0;
   for (size_t i = 0; i < lengths.size(); ++i) {
     ASSERT_OK(builder_->Append(is_null[i] > 0));
     for (int j = 0; j < lengths[i]; ++j) {
-      ASSERT_OK(vb->Append(values[pos++]));
+      vb->Append(values[pos++]);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 72e20e9..8073b51 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -116,7 +116,8 @@ class ListBuilder : public Int32Builder {
       int32_t new_capacity = util::next_power2(length_ + length);
       RETURN_NOT_OK(Resize(new_capacity));
     }
-    memcpy(raw_buffer() + length_, values, length * elsize_);
+    memcpy(raw_data_ + length_, values,
+        type_traits<Int32Type>::bytes_required(length));
 
     if (valid_bytes != nullptr) {
       AppendNulls(valid_bytes, length);
@@ -132,13 +133,13 @@ class ListBuilder : public Int32Builder {
 
     // Add final offset if the length is non-zero
     if (length_) {
-      raw_buffer()[length_] = items->length();
+      raw_data_[length_] = items->length();
     }
 
-    auto result = std::make_shared<Container>(type_, length_, values_, items,
+    auto result = std::make_shared<Container>(type_, length_, data_, items,
         null_count_, null_bitmap_);
 
-    values_ = null_bitmap_ = nullptr;
+    data_ = null_bitmap_ = nullptr;
     capacity_ = length_ = null_count_ = 0;
 
     return result;
@@ -162,7 +163,7 @@ class ListBuilder : public Int32Builder {
     } else {
       util::set_bit(null_bitmap_data_, length_);
     }
-    raw_buffer()[length_++] = value_builder_->length();
+    raw_data_[length_++] = value_builder_->length();
     return Status::OK();
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 10ba113..761845d 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -69,11 +69,11 @@ PRIMITIVE_TEST(BooleanType, BOOL, "bool");
 // ----------------------------------------------------------------------
 // Primitive type tests
 
-TEST_F(TestBuilder, TestResize) {
+TEST_F(TestBuilder, TestReserve) {
   builder_->Init(10);
   ASSERT_EQ(2, builder_->null_bitmap()->size());
 
-  builder_->Resize(30);
+  builder_->Reserve(30);
   ASSERT_EQ(4, builder_->null_bitmap()->size());
 }
 
@@ -83,8 +83,9 @@ class TestPrimitiveBuilder : public TestBuilder {
   typedef typename Attrs::ArrayType ArrayType;
   typedef typename Attrs::BuilderType BuilderType;
   typedef typename Attrs::T T;
+  typedef typename Attrs::Type Type;
 
-  void SetUp() {
+  virtual void SetUp() {
     TestBuilder::SetUp();
 
     type_ = Attrs::type();
@@ -99,58 +100,44 @@ class TestPrimitiveBuilder : public TestBuilder {
 
   void RandomData(int N, double pct_null = 0.1) {
     Attrs::draw(N, &draws_);
-    test::random_null_bitmap(N, pct_null, &valid_bytes_);
+
+    valid_bytes_.resize(N);
+    test::random_null_bitmap(N, pct_null, valid_bytes_.data());
   }
 
-  void CheckNullable() {
-    int size = builder_->length();
+  void Check(const std::shared_ptr<BuilderType>& builder, bool nullable) {
+    int size = builder->length();
 
-    auto ex_data = std::make_shared<Buffer>(
-        reinterpret_cast<uint8_t*>(draws_.data()),
+    auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
         size * sizeof(T));
 
-    auto ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_.data(), size);
-    int32_t ex_null_count = test::null_count(valid_bytes_);
+    std::shared_ptr<Buffer> ex_null_bitmap;
+    int32_t ex_null_count = 0;
+
+    if (nullable) {
+      ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+      ex_null_count = test::null_count(valid_bytes_);
+    } else {
+      ex_null_bitmap = nullptr;
+    }
 
     auto expected = std::make_shared<ArrayType>(size, ex_data, ex_null_count,
         ex_null_bitmap);
-
     std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
-        builder_->Finish());
+        builder->Finish());
 
     // Builder is now reset
-    ASSERT_EQ(0, builder_->length());
-    ASSERT_EQ(0, builder_->capacity());
-    ASSERT_EQ(0, builder_->null_count());
-    ASSERT_EQ(nullptr, builder_->buffer());
+    ASSERT_EQ(0, builder->length());
+    ASSERT_EQ(0, builder->capacity());
+    ASSERT_EQ(0, builder->null_count());
+    ASSERT_EQ(nullptr, builder->data());
 
     ASSERT_EQ(ex_null_count, result->null_count());
     ASSERT_TRUE(result->EqualsExact(*expected.get()));
   }
 
-  void CheckNonNullable() {
-    int size = builder_nn_->length();
-
-    auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
-        size * sizeof(T));
-
-    auto expected = std::make_shared<ArrayType>(size, ex_data);
-
-    std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
-        builder_nn_->Finish());
-
-    // Builder is now reset
-    ASSERT_EQ(0, builder_nn_->length());
-    ASSERT_EQ(0, builder_nn_->capacity());
-    ASSERT_EQ(nullptr, builder_nn_->buffer());
-
-    ASSERT_TRUE(result->EqualsExact(*expected.get()));
-    ASSERT_EQ(0, result->null_count());
-  }
-
  protected:
-  TypePtr type_;
-  TypePtr type_nn_;
+  std::shared_ptr<DataType> type_;
   shared_ptr<BuilderType> builder_;
   shared_ptr<BuilderType> builder_nn_;
 
@@ -158,14 +145,14 @@ class TestPrimitiveBuilder : public TestBuilder {
   vector<uint8_t> valid_bytes_;
 };
 
-#define PTYPE_DECL(CapType, c_type)             \
-  typedef CapType##Array ArrayType;             \
-  typedef CapType##Builder BuilderType;         \
-  typedef CapType##Type Type;                   \
-  typedef c_type T;                             \
-                                                \
-  static TypePtr type() {                       \
-    return TypePtr(new Type());                 \
+#define PTYPE_DECL(CapType, c_type)                 \
+  typedef CapType##Array ArrayType;                 \
+  typedef CapType##Builder BuilderType;             \
+  typedef CapType##Type Type;                       \
+  typedef c_type T;                                 \
+                                                    \
+  static std::shared_ptr<DataType> type() {         \
+    return std::shared_ptr<DataType>(new Type());   \
   }
 
 #define PINT_DECL(CapType, c_type, LOWER, UPPER)    \
@@ -176,6 +163,14 @@ class TestPrimitiveBuilder : public TestBuilder {
     }                                               \
   }
 
+#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER)      \
+  struct P##CapType {                                   \
+    PTYPE_DECL(CapType, c_type);                        \
+    static void draw(int N, vector<T>* draws) {         \
+      test::random_real<T>(N, 0, LOWER, UPPER, draws);  \
+    }                                                   \
+  }
+
 PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX);
 PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX);
 PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX);
@@ -186,25 +181,89 @@ PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX);
 PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX);
 PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX);
 
-typedef ::testing::Types<PUInt8, PUInt16, PUInt32, PUInt64,
-                         PInt8, PInt16, PInt32, PInt64> Primitives;
+PFLOAT_DECL(Float, float, -1000, 1000);
+PFLOAT_DECL(Double, double, -1000, 1000);
+
+struct PBoolean {
+  PTYPE_DECL(Boolean, uint8_t);
+};
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::RandomData(int N, double pct_null) {
+  draws_.resize(N);
+  valid_bytes_.resize(N);
+
+  test::random_null_bitmap(N, 0.5, draws_.data());
+  test::random_null_bitmap(N, pct_null, valid_bytes_.data());
+}
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::Check(
+    const std::shared_ptr<BooleanBuilder>& builder, bool nullable) {
+  int size = builder->length();
+
+  auto ex_data = test::bytes_to_null_buffer(draws_);
+
+  std::shared_ptr<Buffer> ex_null_bitmap;
+  int32_t ex_null_count = 0;
+
+  if (nullable) {
+    ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+    ex_null_count = test::null_count(valid_bytes_);
+  } else {
+    ex_null_bitmap = nullptr;
+  }
+
+  auto expected = std::make_shared<BooleanArray>(size, ex_data, ex_null_count,
+      ex_null_bitmap);
+  std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(
+      builder->Finish());
+
+  // Builder is now reset
+  ASSERT_EQ(0, builder->length());
+  ASSERT_EQ(0, builder->capacity());
+  ASSERT_EQ(0, builder->null_count());
+  ASSERT_EQ(nullptr, builder->data());
+
+  ASSERT_EQ(ex_null_count, result->null_count());
+
+  ASSERT_EQ(expected->length(), result->length());
+
+  for (int i = 0; i < result->length(); ++i) {
+    if (nullable) {
+      ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i;
+    }
+    bool actual = util::get_bit(result->raw_data(), i);
+    ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
+  }
+  ASSERT_TRUE(result->EqualsExact(*expected.get()));
+}
+
+typedef ::testing::Types<PBoolean,
+                         PUInt8, PUInt16, PUInt32, PUInt64,
+                         PInt8, PInt16, PInt32, PInt64,
+                         PFloat, PDouble> Primitives;
 
 TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
 
 #define DECL_T()                                \
   typedef typename TestFixture::T T;
 
+#define DECL_TYPE()                             \
+  typedef typename TestFixture::Type Type;
+
 #define DECL_ARRAYTYPE()                                \
   typedef typename TestFixture::ArrayType ArrayType;
 
 
 TYPED_TEST(TestPrimitiveBuilder, TestInit) {
-  DECL_T();
+  DECL_TYPE();
 
   int n = 1000;
-  ASSERT_OK(this->builder_->Init(n));
-  ASSERT_EQ(n, this->builder_->capacity());
-  ASSERT_EQ(n * sizeof(T), this->builder_->buffer()->size());
+  ASSERT_OK(this->builder_->Reserve(n));
+  ASSERT_EQ(util::next_power2(n), this->builder_->capacity());
+  ASSERT_EQ(util::next_power2(type_traits<Type>::bytes_required(n)),
+      this->builder_->data()->size());
 
   // unsure if this should go in all builder classes
   ASSERT_EQ(0, this->builder_->num_children());
@@ -235,12 +294,14 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
 
   this->RandomData(size);
 
+  this->builder_->Reserve(size);
+
   int i;
   for (i = 0; i < size; ++i) {
     if (valid_bytes[i] > 0) {
-      ASSERT_OK(this->builder_->Append(draws[i]));
+      this->builder_->Append(draws[i]);
     } else {
-      ASSERT_OK(this->builder_->AppendNull());
+      this->builder_->AppendNull();
     }
   }
 
@@ -261,31 +322,41 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
 
   this->RandomData(size);
 
+  this->builder_->Reserve(1000);
+  this->builder_nn_->Reserve(1000);
+
   int i;
+  int null_count = 0;
   // Append the first 1000
   for (i = 0; i < 1000; ++i) {
     if (valid_bytes[i] > 0) {
-      ASSERT_OK(this->builder_->Append(draws[i]));
+      this->builder_->Append(draws[i]);
     } else {
-      ASSERT_OK(this->builder_->AppendNull());
+      this->builder_->AppendNull();
+      ++null_count;
     }
-    ASSERT_OK(this->builder_nn_->Append(draws[i]));
+    this->builder_nn_->Append(draws[i]);
   }
 
+  ASSERT_EQ(null_count, this->builder_->null_count());
+
   ASSERT_EQ(1000, this->builder_->length());
   ASSERT_EQ(1024, this->builder_->capacity());
 
   ASSERT_EQ(1000, this->builder_nn_->length());
   ASSERT_EQ(1024, this->builder_nn_->capacity());
 
+  this->builder_->Reserve(size - 1000);
+  this->builder_nn_->Reserve(size - 1000);
+
   // Append the next 9000
   for (i = 1000; i < size; ++i) {
     if (valid_bytes[i] > 0) {
-      ASSERT_OK(this->builder_->Append(draws[i]));
+      this->builder_->Append(draws[i]);
     } else {
-      ASSERT_OK(this->builder_->AppendNull());
+      this->builder_->AppendNull();
     }
-    ASSERT_OK(this->builder_nn_->Append(draws[i]));
+    this->builder_nn_->Append(draws[i]);
   }
 
   ASSERT_EQ(size, this->builder_->length());
@@ -294,8 +365,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
   ASSERT_EQ(size, this->builder_nn_->length());
   ASSERT_EQ(util::next_power2(size), this->builder_nn_->capacity());
 
-  this->CheckNullable();
-  this->CheckNonNullable();
+  this->Check(this->builder_, true);
+  this->Check(this->builder_nn_, false);
 }
 
 
@@ -327,31 +398,34 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
   ASSERT_EQ(size, this->builder_->length());
   ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
 
-  this->CheckNullable();
-  this->CheckNonNullable();
+  this->Check(this->builder_, true);
+  this->Check(this->builder_nn_, false);
 }
 
 TYPED_TEST(TestPrimitiveBuilder, TestAdvance) {
   int n = 1000;
-  ASSERT_OK(this->builder_->Init(n));
+  ASSERT_OK(this->builder_->Reserve(n));
 
   ASSERT_OK(this->builder_->Advance(100));
   ASSERT_EQ(100, this->builder_->length());
 
   ASSERT_OK(this->builder_->Advance(900));
-  ASSERT_RAISES(Invalid, this->builder_->Advance(1));
+
+  int too_many = this->builder_->capacity() - 1000 + 1;
+  ASSERT_RAISES(Invalid, this->builder_->Advance(too_many));
 }
 
 TYPED_TEST(TestPrimitiveBuilder, TestResize) {
-  DECL_T();
+  DECL_TYPE();
 
   int cap = MIN_BUILDER_CAPACITY * 2;
 
-  ASSERT_OK(this->builder_->Resize(cap));
+  ASSERT_OK(this->builder_->Reserve(cap));
   ASSERT_EQ(cap, this->builder_->capacity());
 
-  ASSERT_EQ(cap * sizeof(T), this->builder_->buffer()->size());
-  ASSERT_EQ(util::ceil_byte(cap) / 8, this->builder_->null_bitmap()->size());
+  ASSERT_EQ(type_traits<Type>::bytes_required(cap), this->builder_->data()->size());
+  ASSERT_EQ(util::bytes_for_bits(cap),
+      this->builder_->null_bitmap()->size());
 }
 
 TYPED_TEST(TestPrimitiveBuilder, TestReserve) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index ecd5d68..c54d075 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -20,20 +20,20 @@
 #include <memory>
 
 #include "arrow/util/buffer.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
 // ----------------------------------------------------------------------
 // Primitive array base
 
-PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length, int value_size,
+PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length,
     const std::shared_ptr<Buffer>& data,
     int32_t null_count,
     const std::shared_ptr<Buffer>& null_bitmap) :
     Array(type, length, null_count, null_bitmap) {
   data_ = data;
   raw_data_ = data == nullptr? nullptr : data_->data();
-  value_size_ = value_size;
 }
 
 bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
@@ -52,12 +52,15 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
     const uint8_t* this_data = raw_data_;
     const uint8_t* other_data = other.raw_data_;
 
+    int value_size = type_->value_size();
+    DCHECK_GT(value_size, 0);
+
     for (int i = 0; i < length_; ++i) {
-      if (!IsNull(i) && memcmp(this_data, other_data, value_size_)) {
+      if (!IsNull(i) && memcmp(this_data, other_data, value_size)) {
         return false;
       }
-      this_data += value_size_;
-      other_data += value_size_;
+      this_data += value_size;
+      other_data += value_size;
     }
     return true;
   } else {
@@ -73,4 +76,170 @@ bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
   return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
 }
 
+template <typename T>
+Status PrimitiveBuilder<T>::Init(int32_t capacity) {
+  RETURN_NOT_OK(ArrayBuilder::Init(capacity));
+  data_ = std::make_shared<PoolBuffer>(pool_);
+
+  int64_t nbytes = type_traits<T>::bytes_required(capacity);
+  RETURN_NOT_OK(data_->Resize(nbytes));
+  memset(data_->mutable_data(), 0, nbytes);
+
+  raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
+  return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::Resize(int32_t capacity) {
+  // XXX: Set floor size for now
+  if (capacity < MIN_BUILDER_CAPACITY) {
+    capacity = MIN_BUILDER_CAPACITY;
+  }
+
+  if (capacity_ == 0) {
+    RETURN_NOT_OK(Init(capacity));
+  } else {
+    RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
+
+    int64_t old_bytes = data_->size();
+    int64_t new_bytes = type_traits<T>::bytes_required(capacity);
+    RETURN_NOT_OK(data_->Resize(new_bytes));
+    raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
+
+    memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
+  }
+  capacity_ = capacity;
+  return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::Reserve(int32_t elements) {
+  if (length_ + elements > capacity_) {
+    int32_t new_capacity = util::next_power2(length_ + elements);
+    return Resize(new_capacity);
+  }
+  return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::Append(const value_type* values, int32_t length,
+    const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(PrimitiveBuilder<T>::Reserve(length));
+
+  if (length > 0) {
+    memcpy(raw_data_ + length_, values, type_traits<T>::bytes_required(length));
+  }
+
+  if (valid_bytes != nullptr) {
+    PrimitiveBuilder<T>::AppendNulls(valid_bytes, length);
+  } else {
+    for (int i = 0; i < length; ++i) {
+      util::set_bit(null_bitmap_data_, length_ + i);
+    }
+  }
+
+  length_ += length;
+  return Status::OK();
+}
+
+template <typename T>
+void PrimitiveBuilder<T>::AppendNulls(const uint8_t* valid_bytes, int32_t length) {
+  // If valid_bytes is all not null, then none of the values are null
+  for (int i = 0; i < length; ++i) {
+    if (valid_bytes[i] == 0) {
+      ++null_count_;
+    } else {
+      util::set_bit(null_bitmap_data_, length_ + i);
+    }
+  }
+}
+
+template <typename T>
+std::shared_ptr<Array> PrimitiveBuilder<T>::Finish() {
+  std::shared_ptr<Array> result = std::make_shared<
+    typename type_traits<T>::ArrayType>(
+        type_, length_, data_, null_count_, null_bitmap_);
+
+  data_ = null_bitmap_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+  return result;
+}
+
+template <>
+Status PrimitiveBuilder<BooleanType>::Append(const uint8_t* values, int32_t length,
+    const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(Reserve(length));
+
+  for (int i = 0; i < length; ++i) {
+    if (values[i] > 0) {
+      util::set_bit(raw_data_, length_ + i);
+    } else {
+      util::clear_bit(raw_data_, length_ + i);
+    }
+  }
+
+  if (valid_bytes != nullptr) {
+    PrimitiveBuilder<BooleanType>::AppendNulls(valid_bytes, length);
+  } else {
+    for (int i = 0; i < length; ++i) {
+      util::set_bit(null_bitmap_data_, length_ + i);
+    }
+  }
+  length_ += length;
+  return Status::OK();
+}
+
+template class PrimitiveBuilder<UInt8Type>;
+template class PrimitiveBuilder<UInt16Type>;
+template class PrimitiveBuilder<UInt32Type>;
+template class PrimitiveBuilder<UInt64Type>;
+template class PrimitiveBuilder<Int8Type>;
+template class PrimitiveBuilder<Int16Type>;
+template class PrimitiveBuilder<Int32Type>;
+template class PrimitiveBuilder<Int64Type>;
+template class PrimitiveBuilder<FloatType>;
+template class PrimitiveBuilder<DoubleType>;
+template class PrimitiveBuilder<BooleanType>;
+
+BooleanArray::BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
+    int32_t null_count,
+    const std::shared_ptr<Buffer>& null_bitmap) :
+    PrimitiveArray(std::make_shared<BooleanType>(), length,
+        data, null_count, null_bitmap) {}
+
+bool BooleanArray::EqualsExact(const BooleanArray& other) const {
+  if (this == &other) return true;
+  if (null_count_ != other.null_count_) {
+    return false;
+  }
+
+  if (null_count_ > 0) {
+    bool equal_bitmap = null_bitmap_->Equals(*other.null_bitmap_,
+        util::bytes_for_bits(length_));
+    if (!equal_bitmap) {
+      return false;
+    }
+
+    const uint8_t* this_data = raw_data_;
+    const uint8_t* other_data = other.raw_data_;
+
+    for (int i = 0; i < length_; ++i) {
+      if (!IsNull(i) && util::get_bit(this_data, i) != util::get_bit(other_data, i)) {
+        return false;
+      }
+    }
+    return true;
+  } else {
+    return data_->Equals(*other.data_, util::bytes_for_bits(length_));
+  }
+}
+
+bool BooleanArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) return true;
+  if (Type::BOOL != arr->type_enum()) {
+    return false;
+  }
+  return EqualsExact(*static_cast<const BooleanArray*>(arr.get()));
+}
+
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 4eaff43..ec6fee3 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -21,6 +21,7 @@
 #include <cstdint>
 #include <cstring>
 #include <memory>
+#include <vector>
 
 #include "arrow/array.h"
 #include "arrow/builder.h"
@@ -37,7 +38,7 @@ class MemoryPool;
 // Base class for fixed-size logical types
 class PrimitiveArray : public Array {
  public:
-  PrimitiveArray(const TypePtr& type, int32_t length, int value_size,
+  PrimitiveArray(const TypePtr& type, int32_t length,
       const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
@@ -51,25 +52,19 @@ class PrimitiveArray : public Array {
  protected:
   std::shared_ptr<Buffer> data_;
   const uint8_t* raw_data_;
-  int value_size_;
 };
 
 #define NUMERIC_ARRAY_DECL(NAME, TypeClass, T)                  \
 class NAME : public PrimitiveArray {                            \
  public:                                                        \
   using value_type = T;                                         \
-  NAME(const TypePtr& type, int32_t length,                     \
-      const std::shared_ptr<Buffer>& data,                      \
-      int32_t null_count = 0,                                   \
-      const std::shared_ptr<Buffer>& null_bitmap = nullptr) :   \
-      PrimitiveArray(std::make_shared<TypeClass>(), length,     \
-          sizeof(T), data, null_count, null_bitmap) {}          \
+  using PrimitiveArray::PrimitiveArray;                         \
                                                                 \
   NAME(int32_t length, const std::shared_ptr<Buffer>& data,     \
       int32_t null_count = 0,                                   \
       const std::shared_ptr<Buffer>& null_bitmap = nullptr) :   \
       PrimitiveArray(std::make_shared<TypeClass>(), length,     \
-          sizeof(T), data, null_count, null_bitmap) {}          \
+          data, null_count, null_bitmap) {}                     \
                                                                 \
   bool EqualsExact(const NAME& other) const {                   \
     return PrimitiveArray::EqualsExact(                         \
@@ -96,148 +91,241 @@ NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t);
 NUMERIC_ARRAY_DECL(FloatArray, FloatType, float);
 NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double);
 
-template <typename Type, typename ArrayType>
+template <typename Type>
 class PrimitiveBuilder : public ArrayBuilder {
  public:
   typedef typename Type::c_type value_type;
 
   explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) :
       ArrayBuilder(pool, type),
-      values_(nullptr) {
-    elsize_ = sizeof(value_type);
-  }
+      data_(nullptr) {}
 
   virtual ~PrimitiveBuilder() {}
 
-  Status Resize(int32_t capacity) {
-    // XXX: Set floor size for now
-    if (capacity < MIN_BUILDER_CAPACITY) {
-      capacity = MIN_BUILDER_CAPACITY;
-    }
-
-    if (capacity_ == 0) {
-      RETURN_NOT_OK(Init(capacity));
-    } else {
-      RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
-      RETURN_NOT_OK(values_->Resize(capacity * elsize_));
-    }
-    capacity_ = capacity;
-    return Status::OK();
-  }
-
-  Status Init(int32_t capacity) {
-    RETURN_NOT_OK(ArrayBuilder::Init(capacity));
-    values_ = std::make_shared<PoolBuffer>(pool_);
-    return values_->Resize(capacity * elsize_);
-  }
-
-  Status Reserve(int32_t elements) {
-    if (length_ + elements > capacity_) {
-      int32_t new_capacity = util::next_power2(length_ + elements);
-      return Resize(new_capacity);
-    }
-    return Status::OK();
-  }
+  using ArrayBuilder::Advance;
 
-  Status Advance(int32_t elements) {
-    return ArrayBuilder::Advance(elements);
-  }
+  // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  void AppendNulls(const uint8_t* valid_bytes, int32_t length);
 
-  // Scalar append
-  Status Append(value_type val) {
+  Status AppendNull() {
     if (length_ == capacity_) {
       // If the capacity was not already a multiple of 2, do so here
       RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
     }
-    util::set_bit(null_bitmap_data_, length_);
-    raw_buffer()[length_++] = val;
+    ++null_count_;
+    ++length_;
     return Status::OK();
   }
 
+  std::shared_ptr<Buffer> data() const {
+    return data_;
+  }
+
   // Vector append
   //
   // If passed, valid_bytes is of equal length to values, and any zero byte
   // will be considered as a null for that slot
   Status Append(const value_type* values, int32_t length,
-      const uint8_t* valid_bytes = nullptr) {
-    if (length_ + length > capacity_) {
-      int32_t new_capacity = util::next_power2(length_ + length);
-      RETURN_NOT_OK(Resize(new_capacity));
-    }
-    if (length > 0) {
-      memcpy(raw_buffer() + length_, values, length * elsize_);
-    }
+      const uint8_t* valid_bytes = nullptr);
 
-    if (valid_bytes != nullptr) {
-      AppendNulls(valid_bytes, length);
-    } else {
-      for (int i = 0; i < length; ++i) {
-        util::set_bit(null_bitmap_data_, length_ + i);
-      }
-    }
+  // Ensure that builder can accommodate an additional number of
+  // elements. Resizes if the current capacity is not sufficient
+  Status Reserve(int32_t elements);
 
-    length_ += length;
-    return Status::OK();
+  std::shared_ptr<Array> Finish() override;
+
+ protected:
+  std::shared_ptr<PoolBuffer> data_;
+  value_type* raw_data_;
+
+  Status Init(int32_t capacity);
+
+  // Increase the capacity of the builder to accommodate at least the indicated
+  // number of elements
+  Status Resize(int32_t capacity);
+};
+
+template <typename T>
+class NumericBuilder : public PrimitiveBuilder<T> {
+ public:
+  using typename PrimitiveBuilder<T>::value_type;
+  using PrimitiveBuilder<T>::PrimitiveBuilder;
+
+  using PrimitiveBuilder<T>::Append;
+
+  // Scalar append. Does not capacity-check; make sure to call Reserve beforehand
+  void Append(value_type val) {
+    util::set_bit(null_bitmap_data_, length_);
+    raw_data_[length_++] = val;
   }
 
-  // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
-  void AppendNulls(const uint8_t* valid_bytes, int32_t length) {
-    // If valid_bytes is all not null, then none of the values are null
-    for (int i = 0; i < length; ++i) {
-      if (valid_bytes[i] == 0) {
-        ++null_count_;
-      } else {
-        util::set_bit(null_bitmap_data_, length_ + i);
-      }
-    }
+ protected:
+  using PrimitiveBuilder<T>::length_;
+  using PrimitiveBuilder<T>::null_bitmap_data_;
+  using PrimitiveBuilder<T>::raw_data_;
+
+  using PrimitiveBuilder<T>::Init;
+  using PrimitiveBuilder<T>::Resize;
+};
+
+template <>
+struct type_traits<UInt8Type> {
+  typedef UInt8Array ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements;
   }
+};
 
-  Status AppendNull() {
-    if (length_ == capacity_) {
-      // If the capacity was not already a multiple of 2, do so here
-      RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
-    }
-    ++null_count_;
-    ++length_;
-    return Status::OK();
+template <>
+struct type_traits<Int8Type> {
+  typedef Int8Array ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements;
   }
+};
 
-  std::shared_ptr<Array> Finish() override {
-    std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>(
-        type_, length_, values_, null_count_, null_bitmap_);
+template <>
+struct type_traits<UInt16Type> {
+  typedef UInt16Array ArrayType;
 
-    values_ = null_bitmap_ = nullptr;
-    capacity_ = length_ = null_count_ = 0;
-    return result;
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(uint16_t);
   }
+};
+
+template <>
+struct type_traits<Int16Type> {
+  typedef Int16Array ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(int16_t);
+  }
+};
 
-  value_type* raw_buffer() {
-    return reinterpret_cast<value_type*>(values_->mutable_data());
+template <>
+struct type_traits<UInt32Type> {
+  typedef UInt32Array ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(uint32_t);
   }
+};
 
-  std::shared_ptr<Buffer> buffer() const {
-    return values_;
+template <>
+struct type_traits<Int32Type> {
+  typedef Int32Array ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(int32_t);
   }
+};
 
- protected:
-  std::shared_ptr<PoolBuffer> values_;
-  int elsize_;
+template <>
+struct type_traits<UInt64Type> {
+  typedef UInt64Array ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(uint64_t);
+  }
+};
+
+template <>
+struct type_traits<Int64Type> {
+  typedef Int64Array ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(int64_t);
+  }
+};
+template <>
+struct type_traits<FloatType> {
+  typedef FloatArray ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(float);
+  }
+};
+
+template <>
+struct type_traits<DoubleType> {
+  typedef DoubleArray ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return elements * sizeof(double);
+  }
 };
 
 // Builders
 
-typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
-typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
-typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
-typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
+typedef NumericBuilder<UInt8Type> UInt8Builder;
+typedef NumericBuilder<UInt16Type> UInt16Builder;
+typedef NumericBuilder<UInt32Type> UInt32Builder;
+typedef NumericBuilder<UInt64Type> UInt64Builder;
+
+typedef NumericBuilder<Int8Type> Int8Builder;
+typedef NumericBuilder<Int16Type> Int16Builder;
+typedef NumericBuilder<Int32Type> Int32Builder;
+typedef NumericBuilder<Int64Type> Int64Builder;
+
+typedef NumericBuilder<FloatType> FloatBuilder;
+typedef NumericBuilder<DoubleType> DoubleBuilder;
+
 
-typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
-typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
-typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
-typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
+class BooleanArray : public PrimitiveArray {
+ public:
+  using PrimitiveArray::PrimitiveArray;
+
+  BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
+      int32_t null_count = 0,
+      const std::shared_ptr<Buffer>& null_bitmap = nullptr);
+
+  bool EqualsExact(const BooleanArray& other) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
+
+  const uint8_t* raw_data() const {
+    return reinterpret_cast<const uint8_t*>(raw_data_);
+  }
+
+  bool Value(int i) const {
+    return util::get_bit(raw_data(), i);
+  }
+};
+
+template <>
+struct type_traits<BooleanType> {
+  typedef BooleanArray ArrayType;
+
+  static inline int bytes_required(int elements) {
+    return util::bytes_for_bits(elements);
+  }
+};
+
+class BooleanBuilder : public PrimitiveBuilder<BooleanType> {
+ public:
+  explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type) :
+      PrimitiveBuilder<BooleanType>(pool, type) {}
+
+  virtual ~BooleanBuilder() {}
+
+  using PrimitiveBuilder<BooleanType>::Append;
+
+  // Scalar append
+  void Append(bool val) {
+    util::set_bit(null_bitmap_data_, length_);
+    if (val) {
+      util::set_bit(raw_data_, length_);
+    } else {
+      util::clear_bit(raw_data_, length_);
+    }
+    ++length_;
+  }
 
-typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
-typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
+  void Append(uint8_t val) {
+    Append(static_cast<bool>(val));
+  }
+};
 
 } // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index b329b4f..d3a4cc3 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -92,7 +92,7 @@ class TestStringContainer : public ::testing::Test  {
 
     offsets_buf_ = test::to_buffer(offsets_);
 
-    null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_.data(), valid_bytes_.size());
+    null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
     null_count_ = test::null_count(valid_bytes_);
 
     strings_ = std::make_shared<StringArray>(length_, offsets_buf_, values_,

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index fed05e3..d2a4b09 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -23,6 +23,7 @@
 install(FILES
   bit-util.h
   buffer.h
+  logging.h
   macros.h
   memory-pool.h
   status.h
@@ -59,7 +60,7 @@ if (ARROW_BUILD_BENCHMARKS)
     )
   else()
 	  target_link_libraries(arrow_benchmark_main
-      benchmark 
+      benchmark
       pthread
 	  )
   endif()

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/util/bit-util.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index 292cb33..6c6d533 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <cstring>
+#include <vector>
 
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
@@ -23,25 +24,24 @@
 
 namespace arrow {
 
-void util::bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits) {
-  for (int i = 0; i < length; ++i) {
-    if (static_cast<bool>(bytes[i])) {
+void util::bytes_to_bits(const std::vector<uint8_t>& bytes, uint8_t* bits) {
+  for (size_t i = 0; i < bytes.size(); ++i) {
+    if (bytes[i] > 0) {
       set_bit(bits, i);
     }
   }
 }
 
-Status util::bytes_to_bits(uint8_t* bytes, int length,
+Status util::bytes_to_bits(const std::vector<uint8_t>& bytes,
     std::shared_ptr<Buffer>* out) {
-  int bit_length = ceil_byte(length) / 8;
+  int bit_length = util::bytes_for_bits(bytes.size());
 
   auto buffer = std::make_shared<PoolBuffer>();
   RETURN_NOT_OK(buffer->Resize(bit_length));
   memset(buffer->mutable_data(), 0, bit_length);
-  bytes_to_bits(bytes, length, buffer->mutable_data());
+  bytes_to_bits(bytes, buffer->mutable_data());
 
   *out = buffer;
-
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 08222d5..8d62871 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -20,6 +20,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <vector>
 
 namespace arrow {
 
@@ -43,15 +44,19 @@ static inline int64_t ceil_2bytes(int64_t size) {
 static constexpr uint8_t BITMASK[] = {1, 2, 4, 8, 16, 32, 64, 128};
 
 static inline bool get_bit(const uint8_t* bits, int i) {
-  return bits[i / 8] & BITMASK[i % 8];
+  return static_cast<bool>(bits[i / 8] & BITMASK[i % 8]);
 }
 
 static inline bool bit_not_set(const uint8_t* bits, int i) {
   return (bits[i / 8] & BITMASK[i % 8]) == 0;
 }
 
+static inline void clear_bit(uint8_t* bits, int i) {
+  bits[i / 8] &= ~BITMASK[i % 8];
+}
+
 static inline void set_bit(uint8_t* bits, int i) {
-  bits[i / 8] |= 1 << (i % 8);
+  bits[i / 8] |= BITMASK[i % 8];
 }
 
 static inline int64_t next_power2(int64_t n) {
@@ -66,8 +71,8 @@ static inline int64_t next_power2(int64_t n) {
   return n;
 }
 
-void bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits);
-Status bytes_to_bits(uint8_t*, int, std::shared_ptr<Buffer>*);
+void bytes_to_bits(const std::vector<uint8_t>& bytes, uint8_t* bits);
+Status bytes_to_bits(const std::vector<uint8_t>&, std::shared_ptr<Buffer>*);
 
 } // namespace util
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index e6afcbd..943a08f 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -86,6 +86,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
         c_bool IsNull(int i)
 
+    cdef cppclass CBooleanArray" arrow::BooleanArray"(CArray):
+        c_bool Value(int i)
+
     cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
         uint8_t Value(int i)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 04f013d..0d391e5 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -58,7 +58,10 @@ cdef class ArrayValue(Scalar):
 
 
 cdef class BooleanValue(ArrayValue):
-    pass
+
+    def as_py(self):
+        cdef CBooleanArray* ap = <CBooleanArray*> self.sp_array.get()
+        return ap.Value(self.index)
 
 
 cdef class Int8Value(ArrayValue):
@@ -172,6 +175,7 @@ cdef class ListValue(ArrayValue):
 
 
 cdef dict _scalar_classes = {
+    Type_BOOL: BooleanValue,
     Type_UINT8: Int8Value,
     Type_UINT16: Int16Value,
     Type_UINT32: Int32Value,

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 25f6969..2beb6b3 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -22,7 +22,10 @@ import pyarrow
 class TestConvertList(unittest.TestCase):
 
     def test_boolean(self):
-        pass
+        arr = pyarrow.from_pylist([True, None, False, None])
+        assert len(arr) == 4
+        assert arr.null_count == 2
+        assert arr.type == pyarrow.bool_()
 
     def test_empty_list(self):
         arr = pyarrow.from_pylist([])

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 021737d..4fb850a 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -16,67 +16,74 @@
 # under the License.
 
 from pyarrow.compat import unittest, u
-import pyarrow as arrow
+import pyarrow as A
 
 
 class TestScalars(unittest.TestCase):
 
     def test_null_singleton(self):
         with self.assertRaises(Exception):
-            arrow.NAType()
+            A.NAType()
 
     def test_bool(self):
-        pass
+        arr = A.from_pylist([True, None, False, None])
+
+        v = arr[0]
+        assert isinstance(v, A.BooleanValue)
+        assert repr(v) == "True"
+        assert v.as_py() == True
+
+        assert arr[1] is A.NA
 
     def test_int64(self):
-        arr = arrow.from_pylist([1, 2, None])
+        arr = A.from_pylist([1, 2, None])
 
         v = arr[0]
-        assert isinstance(v, arrow.Int64Value)
+        assert isinstance(v, A.Int64Value)
         assert repr(v) == "1"
         assert v.as_py() == 1
 
-        assert arr[2] is arrow.NA
+        assert arr[2] is A.NA
 
     def test_double(self):
-        arr = arrow.from_pylist([1.5, None, 3])
+        arr = A.from_pylist([1.5, None, 3])
 
         v = arr[0]
-        assert isinstance(v, arrow.DoubleValue)
+        assert isinstance(v, A.DoubleValue)
         assert repr(v) == "1.5"
         assert v.as_py() == 1.5
 
-        assert arr[1] is arrow.NA
+        assert arr[1] is A.NA
 
         v = arr[2]
         assert v.as_py() == 3.0
 
     def test_string(self):
-        arr = arrow.from_pylist(['foo', None, u('bar')])
+        arr = A.from_pylist(['foo', None, u('bar')])
 
         v = arr[0]
-        assert isinstance(v, arrow.StringValue)
+        assert isinstance(v, A.StringValue)
         assert repr(v) == "'foo'"
         assert v.as_py() == 'foo'
 
-        assert arr[1] is arrow.NA
+        assert arr[1] is A.NA
 
         v = arr[2].as_py()
         assert v == 'bar'
         assert isinstance(v, str)
 
     def test_list(self):
-        arr = arrow.from_pylist([['foo', None], None, ['bar'], []])
+        arr = A.from_pylist([['foo', None], None, ['bar'], []])
 
         v = arr[0]
         assert len(v) == 2
-        assert isinstance(v, arrow.ListValue)
+        assert isinstance(v, A.ListValue)
         assert repr(v) == "['foo', None]"
         assert v.as_py() == ['foo', None]
         assert v[0].as_py() == 'foo'
-        assert v[1] is arrow.NA
+        assert v[1] is A.NA
 
-        assert arr[1] is arrow.NA
+        assert arr[1] is A.NA
 
         v = arr[3]
         assert len(v) == 0

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
index acb13ac..78ef1b3 100644
--- a/python/src/pyarrow/adapters/builtin.cc
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -61,6 +61,8 @@ class ScalarVisitor {
     ++total_count_;
     if (obj == Py_None) {
       ++none_count_;
+    } else if (PyBool_Check(obj)) {
+      ++bool_count_;
     } else if (PyFloat_Check(obj)) {
       ++float_count_;
     } else if (IsPyInteger(obj)) {
@@ -256,6 +258,20 @@ class TypedConverter : public SeqConverter {
 class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {
  public:
   Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        if (item.obj() == Py_True) {
+          typed_builder_->Append(true);
+        } else {
+          typed_builder_->Append(false);
+        }
+      }
+    }
     return Status::OK();
   }
 };
@@ -265,14 +281,15 @@ class Int64Converter : public TypedConverter<arrow::Int64Builder> {
   Status AppendData(PyObject* seq) override {
     int64_t val;
     Py_ssize_t size = PySequence_Size(seq);
+    RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
       if (item.obj() == Py_None) {
-        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+        typed_builder_->AppendNull();
       } else {
         val = PyLong_AsLongLong(item.obj());
         RETURN_IF_PYERROR();
-        RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
+        typed_builder_->Append(val);
       }
     }
     return Status::OK();
@@ -284,14 +301,15 @@ class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
   Status AppendData(PyObject* seq) override {
     double val;
     Py_ssize_t size = PySequence_Size(seq);
+    RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
       if (item.obj() == Py_None) {
-        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+        typed_builder_->AppendNull();
       } else {
         val = PyFloat_AsDouble(item.obj());
         RETURN_IF_PYERROR();
-        RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
+        typed_builder_->Append(val);
       }
     }
     return Status::OK();


Mime
View raw message