arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jacq...@apache.org
Subject [14/17] arrow git commit: ARROW-4: This provides an partial C++11 implementation of the Apache Arrow data structures along with a cmake-based build system. The codebase generally follows Google C++ style guide, but more cleaning to be more conforming is
Date Wed, 17 Feb 2016 12:39:49 GMT
http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
new file mode 100644
index 0000000..47673ff
--- /dev/null
+++ b/cpp/src/arrow/types/list-test.cc
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <cstdlib>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/list.h"
+#include "arrow/types/string.h"
+#include "arrow/types/test-common.h"
+#include "arrow/util/status.h"
+
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace arrow {
+
+class ArrayBuilder;
+
+TEST(TypesTest, TestListType) {
+  std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
+
+  ListType list_type(vt);
+  ListType list_type_nn(vt, false);
+
+  ASSERT_EQ(list_type.type, TypeEnum::LIST);
+  ASSERT_TRUE(list_type.nullable);
+  ASSERT_FALSE(list_type_nn.nullable);
+
+  ASSERT_EQ(list_type.name(), string("list"));
+  ASSERT_EQ(list_type.ToString(), string("list<uint8>"));
+
+  ASSERT_EQ(list_type.value_type->type, vt->type);
+  ASSERT_EQ(list_type.value_type->type, vt->type);
+
+  std::shared_ptr<DataType> st = std::make_shared<StringType>();
+  std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
+  ASSERT_EQ(lt->ToString(), string("list<string>"));
+
+  ListType lt2(lt);
+  ASSERT_EQ(lt2.ToString(), string("list<list<string>>"));
+}
+
+// ----------------------------------------------------------------------
+// List tests
+
+class TestListBuilder : public TestBuilder {
+ public:
+  void SetUp() {
+    TestBuilder::SetUp();
+
+    value_type_ = TypePtr(new Int32Type());
+    type_ = TypePtr(new ListType(value_type_));
+
+    ArrayBuilder* tmp;
+    ASSERT_OK(make_builder(type_, &tmp));
+    builder_.reset(static_cast<ListBuilder*>(tmp));
+  }
+
+  void Done() {
+    Array* out;
+    ASSERT_OK(builder_->ToArray(&out));
+    result_.reset(static_cast<ListArray*>(out));
+  }
+
+ protected:
+  TypePtr value_type_;
+  TypePtr type_;
+
+  unique_ptr<ListBuilder> builder_;
+  unique_ptr<ListArray> result_;
+};
+
+
+TEST_F(TestListBuilder, TestResize) {
+}
+
+TEST_F(TestListBuilder, TestAppendNull) {
+  ASSERT_OK(builder_->AppendNull());
+  ASSERT_OK(builder_->AppendNull());
+
+  Done();
+
+  ASSERT_TRUE(result_->IsNull(0));
+  ASSERT_TRUE(result_->IsNull(1));
+
+  ASSERT_EQ(0, result_->offsets()[0]);
+  ASSERT_EQ(0, result_->offset(1));
+  ASSERT_EQ(0, result_->offset(2));
+
+  Int32Array* values = static_cast<Int32Array*>(result_->values().get());
+  ASSERT_EQ(0, values->length());
+}
+
+TEST_F(TestListBuilder, TestBasics) {
+  vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+  vector<int> lengths = {3, 0, 4};
+  vector<uint8_t> is_null = {0, 1, 0};
+
+  Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder());
+
+  int pos = 0;
+  for (size_t i = 0; i < lengths.size(); ++i) {
+    ASSERT_OK(builder_->Append(is_null[i] > 0));
+    for (int j = 0; j < lengths[i]; ++j) {
+      ASSERT_OK(vb->Append(values[pos++]));
+    }
+  }
+
+  Done();
+
+  ASSERT_TRUE(result_->nullable());
+  ASSERT_TRUE(result_->values()->nullable());
+
+  ASSERT_EQ(3, result_->length());
+  vector<int32_t> ex_offsets = {0, 3, 3, 7};
+  for (size_t i = 0; i < ex_offsets.size(); ++i) {
+    ASSERT_EQ(ex_offsets[i], result_->offset(i));
+  }
+
+  for (int i = 0; i < result_->length(); ++i) {
+    ASSERT_EQ(static_cast<bool>(is_null[i]), result_->IsNull(i));
+  }
+
+  ASSERT_EQ(7, result_->values()->length());
+  Int32Array* varr = static_cast<Int32Array*>(result_->values().get());
+
+  for (size_t i = 0; i < values.size(); ++i) {
+    ASSERT_EQ(values[i], varr->Value(i));
+  }
+}
+
+TEST_F(TestListBuilder, TestBasicsNonNullable) {
+}
+
+
+TEST_F(TestListBuilder, TestZeroLength) {
+  // All buffers are null
+  Done();
+}
+
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
new file mode 100644
index 0000000..f0ff5bf
--- /dev/null
+++ b/cpp/src/arrow/types/list.cc
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/types/list.h"
+
+#include <sstream>
+#include <string>
+
+namespace arrow {
+
+std::string ListType::ToString() const {
+  std::stringstream s;
+  s << "list<" << value_type->ToString() << ">";
+  return s.str();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
new file mode 100644
index 0000000..0f11162
--- /dev/null
+++ b/cpp/src/arrow/types/list.h
@@ -0,0 +1,206 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPES_LIST_H
+#define ARROW_TYPES_LIST_H
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/type.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+struct ListType : public DataType {
+  // List can contain any other logical value type
+  TypePtr value_type;
+
+  explicit ListType(const TypePtr& value_type, bool nullable = true)
+      : DataType(TypeEnum::LIST, nullable),
+        value_type(value_type) {}
+
+  static char const *name() {
+    return "list";
+  }
+
+  virtual std::string ToString() const;
+};
+
+
+class ListArray : public Array {
+ public:
+  ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {}
+
+  ListArray(const TypePtr& type, int64_t length, std::shared_ptr<Buffer> offsets,
+      const ArrayPtr& values, std::shared_ptr<Buffer> nulls = nullptr) {
+    Init(type, length, offsets, values, nulls);
+  }
+
+  virtual ~ListArray() {}
+
+  void Init(const TypePtr& type, int64_t length, std::shared_ptr<Buffer> offsets,
+      const ArrayPtr& values, std::shared_ptr<Buffer> nulls = nullptr) {
+    offset_buf_ = offsets;
+    offsets_ = offsets == nullptr? nullptr :
+      reinterpret_cast<const int32_t*>(offset_buf_->data());
+
+    values_ = values;
+    Array::Init(type, length, nulls);
+  }
+
+  // Return a shared pointer in case the requestor desires to share ownership
+  // with this array.
+  const ArrayPtr& values() const {return values_;}
+
+  const int32_t* offsets() const { return offsets_;}
+
+  int32_t offset(int i) const { return offsets_[i];}
+
+  // Neither of these functions will perform boundschecking
+  int32_t value_offset(int i) { return offsets_[i];}
+  int32_t value_length(int i) { return offsets_[i + 1] - offsets_[i];}
+
+ protected:
+  std::shared_ptr<Buffer> offset_buf_;
+  const int32_t* offsets_;
+  ArrayPtr values_;
+};
+
+// ----------------------------------------------------------------------
+// Array builder
+
+
+// Builder class for variable-length list array value types
+//
+// To use this class, you must append values to the child array builder and use
+// the Append function to delimit each distinct list value (once the values
+// have been appended to the child array)
+class ListBuilder : public Int32Builder {
+ public:
+  ListBuilder(const TypePtr& type, ArrayBuilder* value_builder)
+      : Int32Builder(type) {
+    value_builder_.reset(value_builder);
+  }
+
+  Status Init(int64_t elements) {
+    // One more than requested.
+    //
+    // XXX: This is slightly imprecise, because we might trigger null mask
+    // resizes that are unnecessary when creating arrays with power-of-two size
+    return Int32Builder::Init(elements + 1);
+  }
+
+  Status Resize(int64_t capacity) {
+    // Need space for the end offset
+    RETURN_NOT_OK(Int32Builder::Resize(capacity + 1));
+
+    // Slight hack, as the "real" capacity is one less
+    --capacity_;
+    return Status::OK();
+  }
+
+  // Vector append
+  //
+  // If passed, null_bytes is of equal length to values, and any nonzero byte
+  // will be considered as a null for that slot
+  Status Append(T* values, int64_t length, uint8_t* null_bytes = nullptr) {
+    if (length_ + length > capacity_) {
+      int64_t new_capacity = util::next_power2(length_ + length);
+      RETURN_NOT_OK(Resize(new_capacity));
+    }
+    memcpy(raw_buffer() + length_, values, length * elsize_);
+
+    if (nullable_ && null_bytes != nullptr) {
+      // If null_bytes is all not null, then none of the values are null
+      for (int i = 0; i < length; ++i) {
+        util::set_bit(null_bits_, length_ + i, static_cast<bool>(null_bytes[i]));
+      }
+    }
+
+    length_ += length;
+    return Status::OK();
+  }
+
+  // Initialize an array type instance with the results of this builder
+  // Transfers ownership of all buffers
+  template <typename Container>
+  Status Transfer(Container* out) {
+    Array* child_values;
+    RETURN_NOT_OK(value_builder_->ToArray(&child_values));
+
+    // Add final offset if the length is non-zero
+    if (length_) {
+      raw_buffer()[length_] = child_values->length();
+    }
+
+    out->Init(type_, length_, values_, ArrayPtr(child_values), nulls_);
+    values_ = nulls_ = nullptr;
+    capacity_ = length_ = 0;
+    return Status::OK();
+  }
+
+  virtual Status ToArray(Array** out) {
+    ListArray* result = new ListArray();
+    RETURN_NOT_OK(Transfer(result));
+    *out = static_cast<Array*>(result);
+    return Status::OK();
+  }
+
+  // Start a new variable-length list slot
+  //
+  // This function should be called before beginning to append elements to the
+  // value builder
+  Status Append(bool is_null = false) {
+    if (length_ == capacity_) {
+      // If the capacity was not already a multiple of 2, do so here
+      RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
+    }
+    if (nullable_) {
+      util::set_bit(null_bits_, length_, is_null);
+    }
+
+    raw_buffer()[length_++] = value_builder_->length();
+    return Status::OK();
+  }
+
+  // Status Append(int32_t* offsets, int length, uint8_t* null_bytes) {
+  //   return Int32Builder::Append(offsets, length, null_bytes);
+  // }
+
+  Status AppendNull() {
+    return Append(true);
+  }
+
+  ArrayBuilder* value_builder() const { return value_builder_.get();}
+
+ protected:
+  std::unique_ptr<ArrayBuilder> value_builder_;
+};
+
+
+} // namespace arrow
+
+#endif // ARROW_TYPES_LIST_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/null.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/null.h b/cpp/src/arrow/types/null.h
new file mode 100644
index 0000000..c67f752
--- /dev/null
+++ b/cpp/src/arrow/types/null.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPES_NULL_H
+#define ARROW_TYPES_NULL_H
+
+#include <string>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+struct NullType : public PrimitiveType<NullType> {
+  PRIMITIVE_DECL(NullType, void, NA, 0, "null");
+};
+
+} // namespace arrow
+
+#endif // ARROW_TYPES_NULL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
new file mode 100644
index 0000000..1296860
--- /dev/null
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -0,0 +1,345 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/boolean.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/floating.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/primitive.h"
+#include "arrow/types/test-common.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace arrow {
+
+TEST(TypesTest, TestBytesType) {
+  BytesType t1(3);
+
+  ASSERT_EQ(t1.type, LayoutEnum::BYTE);
+  ASSERT_EQ(t1.size, 3);
+}
+
+
+#define PRIMITIVE_TEST(KLASS, ENUM, NAME)       \
+  TEST(TypesTest, TestPrimitive_##ENUM) {       \
+    KLASS tp;                                   \
+    KLASS tp_nn(false);                         \
+                                                \
+    ASSERT_EQ(tp.type, TypeEnum::ENUM);         \
+    ASSERT_EQ(tp.name(), string(NAME));         \
+    ASSERT_TRUE(tp.nullable);                   \
+    ASSERT_FALSE(tp_nn.nullable);               \
+                                                \
+    KLASS tp_copy = tp_nn;                      \
+    ASSERT_FALSE(tp_copy.nullable);             \
+  }
+
+PRIMITIVE_TEST(Int8Type, INT8, "int8");
+PRIMITIVE_TEST(Int16Type, INT16, "int16");
+PRIMITIVE_TEST(Int32Type, INT32, "int32");
+PRIMITIVE_TEST(Int64Type, INT64, "int64");
+PRIMITIVE_TEST(UInt8Type, UINT8, "uint8");
+PRIMITIVE_TEST(UInt16Type, UINT16, "uint16");
+PRIMITIVE_TEST(UInt32Type, UINT32, "uint32");
+PRIMITIVE_TEST(UInt64Type, UINT64, "uint64");
+
+PRIMITIVE_TEST(FloatType, FLOAT, "float");
+PRIMITIVE_TEST(DoubleType, DOUBLE, "double");
+
+PRIMITIVE_TEST(BooleanType, BOOL, "bool");
+
+// ----------------------------------------------------------------------
+// Primitive type tests
+
+TEST_F(TestBuilder, TestResize) {
+  builder_->Init(10);
+  ASSERT_EQ(2, builder_->nulls()->size());
+
+  builder_->Resize(30);
+  ASSERT_EQ(4, builder_->nulls()->size());
+}
+
+template <typename Attrs>
+class TestPrimitiveBuilder : public TestBuilder {
+ public:
+  typedef typename Attrs::ArrayType ArrayType;
+  typedef typename Attrs::BuilderType BuilderType;
+  typedef typename Attrs::T T;
+
+  void SetUp() {
+    TestBuilder::SetUp();
+
+    type_ = Attrs::type();
+    type_nn_ = Attrs::type(false);
+
+    ArrayBuilder* tmp;
+    ASSERT_OK(make_builder(type_, &tmp));
+    builder_.reset(static_cast<BuilderType*>(tmp));
+
+    ASSERT_OK(make_builder(type_nn_, &tmp));
+    builder_nn_.reset(static_cast<BuilderType*>(tmp));
+  }
+
+  void RandomData(int64_t N, double pct_null = 0.1) {
+    Attrs::draw(N, &draws_);
+    random_nulls(N, pct_null, &nulls_);
+  }
+
+  void CheckNullable() {
+    ArrayType result;
+    ArrayType expected;
+    int64_t size = builder_->length();
+
+    auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
+        size * sizeof(T));
+
+    auto ex_nulls = bytes_to_null_buffer(nulls_.data(), size);
+
+    expected.Init(size, ex_data, ex_nulls);
+    ASSERT_OK(builder_->Transfer(&result));
+
+    // Builder is now reset
+    ASSERT_EQ(0, builder_->length());
+    ASSERT_EQ(0, builder_->capacity());
+    ASSERT_EQ(nullptr, builder_->buffer());
+
+    ASSERT_TRUE(result.Equals(expected));
+  }
+
+  void CheckNonNullable() {
+    ArrayType result;
+    ArrayType expected;
+    int64_t size = builder_nn_->length();
+
+    auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
+        size * sizeof(T));
+
+    expected.Init(size, ex_data);
+    ASSERT_OK(builder_nn_->Transfer(&result));
+
+    // Builder is now reset
+    ASSERT_EQ(0, builder_nn_->length());
+    ASSERT_EQ(0, builder_nn_->capacity());
+    ASSERT_EQ(nullptr, builder_nn_->buffer());
+
+    ASSERT_TRUE(result.Equals(expected));
+  }
+
+ protected:
+  TypePtr type_;
+  TypePtr type_nn_;
+  unique_ptr<BuilderType> builder_;
+  unique_ptr<BuilderType> builder_nn_;
+
+  vector<T> draws_;
+  vector<uint8_t> nulls_;
+};
+
+#define PTYPE_DECL(CapType, c_type)             \
+  typedef CapType##Array ArrayType;             \
+  typedef CapType##Builder BuilderType;         \
+  typedef CapType##Type Type;                   \
+  typedef c_type T;                             \
+                                                \
+  static TypePtr type(bool nullable = true) {   \
+    return TypePtr(new Type(nullable));         \
+  }
+
+#define PINT_DECL(CapType, c_type, LOWER, UPPER)    \
+  struct P##CapType {                               \
+    PTYPE_DECL(CapType, c_type);                    \
+    static void draw(int64_t N, vector<T>* draws) {  \
+      randint<T>(N, LOWER, UPPER, draws);           \
+    }                                               \
+  }
+
+PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX);
+PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX);
+PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX);
+PINT_DECL(UInt64, uint64_t, 0, UINT64_MAX);
+
+PINT_DECL(Int8, int8_t, INT8_MIN, INT8_MAX);
+PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX);
+PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX);
+PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX);
+
+typedef ::testing::Types<PUInt8, PUInt16, PUInt32, PUInt64,
+                         PInt8, PInt16, PInt32, PInt64> Primitives;
+
+TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
+
+#define DECL_T()                                \
+  typedef typename TestFixture::T T;
+
+#define DECL_ARRAYTYPE()                                \
+  typedef typename TestFixture::ArrayType ArrayType;
+
+
+TYPED_TEST(TestPrimitiveBuilder, TestInit) {
+  DECL_T();
+
+  int64_t n = 1000;
+  ASSERT_OK(this->builder_->Init(n));
+  ASSERT_EQ(n, this->builder_->capacity());
+  ASSERT_EQ(n * sizeof(T), this->builder_->buffer()->size());
+
+  // unsure if this should go in all builder classes
+  ASSERT_EQ(0, this->builder_->num_children());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
+  int size = 10000;
+  for (int i = 0; i < size; ++i) {
+    ASSERT_OK(this->builder_->AppendNull());
+  }
+
+  Array* result;
+  ASSERT_OK(this->builder_->ToArray(&result));
+  unique_ptr<Array> holder(result);
+
+  for (int i = 0; i < size; ++i) {
+    ASSERT_TRUE(result->IsNull(i));
+  }
+}
+
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
+  DECL_T();
+
+  int size = 10000;
+
+  vector<T>& draws = this->draws_;
+  vector<uint8_t>& nulls = this->nulls_;
+
+  this->RandomData(size);
+
+  int i;
+  // Append the first 1000
+  for (i = 0; i < 1000; ++i) {
+    ASSERT_OK(this->builder_->Append(draws[i], nulls[i] > 0));
+    ASSERT_OK(this->builder_nn_->Append(draws[i]));
+  }
+
+  ASSERT_EQ(1000, this->builder_->length());
+  ASSERT_EQ(1024, this->builder_->capacity());
+
+  ASSERT_EQ(1000, this->builder_nn_->length());
+  ASSERT_EQ(1024, this->builder_nn_->capacity());
+
+  // Append the next 9000
+  for (i = 1000; i < size; ++i) {
+    ASSERT_OK(this->builder_->Append(draws[i], nulls[i] > 0));
+    ASSERT_OK(this->builder_nn_->Append(draws[i]));
+  }
+
+  ASSERT_EQ(size, this->builder_->length());
+  ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
+
+  ASSERT_EQ(size, this->builder_nn_->length());
+  ASSERT_EQ(util::next_power2(size), this->builder_nn_->capacity());
+
+  this->CheckNullable();
+  this->CheckNonNullable();
+}
+
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
+  DECL_T();
+
+  int size = 10000;
+  this->RandomData(size);
+
+  vector<T>& draws = this->draws_;
+  vector<uint8_t>& nulls = this->nulls_;
+
+  // first slug
+  int K = 1000;
+
+  ASSERT_OK(this->builder_->Append(draws.data(), K, nulls.data()));
+  ASSERT_OK(this->builder_nn_->Append(draws.data(), K));
+
+  ASSERT_EQ(1000, this->builder_->length());
+  ASSERT_EQ(1024, this->builder_->capacity());
+
+  ASSERT_EQ(1000, this->builder_nn_->length());
+  ASSERT_EQ(1024, this->builder_nn_->capacity());
+
+  // Append the next 9000
+  ASSERT_OK(this->builder_->Append(draws.data() + K, size - K, nulls.data() + K));
+  ASSERT_OK(this->builder_nn_->Append(draws.data() + K, size - K));
+
+  ASSERT_EQ(size, this->builder_->length());
+  ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
+
+  this->CheckNullable();
+  this->CheckNonNullable();
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAdvance) {
+  int n = 1000;
+  ASSERT_OK(this->builder_->Init(n));
+
+  ASSERT_OK(this->builder_->Advance(100));
+  ASSERT_EQ(100, this->builder_->length());
+
+  ASSERT_OK(this->builder_->Advance(900));
+  ASSERT_RAISES(Invalid, this->builder_->Advance(1));
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestResize) {
+  DECL_T();
+
+  int cap = MIN_BUILDER_CAPACITY * 2;
+
+  ASSERT_OK(this->builder_->Resize(cap));
+  ASSERT_EQ(cap, this->builder_->capacity());
+
+  ASSERT_EQ(cap * sizeof(T), this->builder_->buffer()->size());
+  ASSERT_EQ(util::ceil_byte(cap) / 8, this->builder_->nulls()->size());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
+  int n = 100;
+  ASSERT_OK(this->builder_->Reserve(n));
+  ASSERT_EQ(0, this->builder_->length());
+  ASSERT_EQ(MIN_BUILDER_CAPACITY, this->builder_->capacity());
+
+  ASSERT_OK(this->builder_->Advance(100));
+  ASSERT_OK(this->builder_->Reserve(MIN_BUILDER_CAPACITY));
+
+  ASSERT_EQ(util::next_power2(MIN_BUILDER_CAPACITY + 100),
+      this->builder_->capacity());
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
new file mode 100644
index 0000000..2612e8c
--- /dev/null
+++ b/cpp/src/arrow/types/primitive.cc
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/types/primitive.h"
+
+#include <memory>
+
+#include "arrow/util/buffer.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Primitive array base
+
+void PrimitiveArray::Init(const TypePtr& type, int64_t length,
+    const std::shared_ptr<Buffer>& data,
+    const std::shared_ptr<Buffer>& nulls) {
+  Array::Init(type, length, nulls);
+  data_ = data;
+  raw_data_ = data == nullptr? nullptr : data_->data();
+}
+
+bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
+  if (this == &other) return true;
+  if (type_->nullable != other.type_->nullable) return false;
+
+  bool equal_data = data_->Equals(*other.data_, length_);
+  if (type_->nullable) {
+    return equal_data &&
+      nulls_->Equals(*other.nulls_, util::ceil_byte(length_) / 8);
+  } else {
+    return equal_data;
+  }
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
new file mode 100644
index 0000000..a419112
--- /dev/null
+++ b/cpp/src/arrow/types/primitive.h
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPES_PRIMITIVE_H
+#define ARROW_TYPES_PRIMITIVE_H
+
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/type.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+template <typename Derived>
+struct PrimitiveType : public DataType {
+  explicit PrimitiveType(bool nullable = true)
+      : DataType(Derived::type_enum, nullable) {}
+
+  virtual std::string ToString() const {
+    return std::string(static_cast<const Derived*>(this)->name());
+  }
+};
+
+#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME)          \
+  typedef C_TYPE c_type;                                            \
+  static constexpr TypeEnum type_enum = TypeEnum::ENUM;             \
+  static constexpr int size = SIZE;                              \
+                                                                    \
+  explicit TYPENAME(bool nullable = true)                           \
+      : PrimitiveType<TYPENAME>(nullable) {}                        \
+                                                                    \
+  static const char* name() {                                       \
+    return NAME;                                                    \
+  }
+
+
+// Base class for fixed-size logical types
+class PrimitiveArray : public Array {
+ public:
+  PrimitiveArray() : Array(), data_(nullptr), raw_data_(nullptr) {}
+
+  virtual ~PrimitiveArray() {}
+
+  void Init(const TypePtr& type, int64_t length, const std::shared_ptr<Buffer>& data,
+      const std::shared_ptr<Buffer>& nulls = nullptr);
+
+  const std::shared_ptr<Buffer>& data() const { return data_;}
+
+  bool Equals(const PrimitiveArray& other) const;
+
+ protected:
+  std::shared_ptr<Buffer> data_;
+  const uint8_t* raw_data_;
+};
+
+
+template <typename TypeClass>
+class PrimitiveArrayImpl : public PrimitiveArray {
+ public:
+  typedef typename TypeClass::c_type T;
+
+  PrimitiveArrayImpl() : PrimitiveArray() {}
+
+  PrimitiveArrayImpl(int64_t length, const std::shared_ptr<Buffer>& data,
+      const std::shared_ptr<Buffer>& nulls = nullptr) {
+    Init(length, data, nulls);
+  }
+
+  void Init(int64_t length, const std::shared_ptr<Buffer>& data,
+      const std::shared_ptr<Buffer>& nulls = nullptr) {
+    TypePtr type(new TypeClass(nulls != nullptr));
+    PrimitiveArray::Init(type, length, data, nulls);
+  }
+
+  bool Equals(const PrimitiveArrayImpl& other) const {
+    return PrimitiveArray::Equals(*static_cast<const PrimitiveArray*>(&other));
+  }
+
+  const T* raw_data() const { return reinterpret_cast<const T*>(raw_data_);}
+
+  T Value(int64_t i) const {
+    return raw_data()[i];
+  }
+
+  TypeClass* exact_type() const {
+    return static_cast<TypeClass*>(type_);
+  }
+};
+
+
+template <typename Type, typename ArrayType>
+class PrimitiveBuilder : public ArrayBuilder {
+ public:
+  typedef typename Type::c_type T;
+
+  explicit PrimitiveBuilder(const TypePtr& type)
+      : ArrayBuilder(type), values_(nullptr) {
+    elsize_ = sizeof(T);
+  }
+
+  virtual ~PrimitiveBuilder() {}
+
+  Status Resize(int64_t capacity) {
+    // XXX: Set floor size for now
+    if (capacity < MIN_BUILDER_CAPACITY) {
+      capacity = MIN_BUILDER_CAPACITY;
+    }
+
+    if (capacity_ == 0) {
+      RETURN_NOT_OK(Init(capacity));
+    } else {
+      RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
+      RETURN_NOT_OK(values_->Resize(capacity * elsize_));
+      capacity_ = capacity;
+    }
+    return Status::OK();
+  }
+
+  Status Init(int64_t capacity) {
+    RETURN_NOT_OK(ArrayBuilder::Init(capacity));
+
+    values_ = std::make_shared<OwnedMutableBuffer>();
+    return values_->Resize(capacity * elsize_);
+  }
+
+  Status Reserve(int64_t elements) {
+    if (length_ + elements > capacity_) {
+      int64_t new_capacity = util::next_power2(length_ + elements);
+      return Resize(new_capacity);
+    }
+    return Status::OK();
+  }
+
+  Status Advance(int64_t elements) {
+    return ArrayBuilder::Advance(elements);
+  }
+
+  // Scalar append
+  Status Append(T val, bool is_null = false) {
+    if (length_ == capacity_) {
+      // If the capacity was not already a multiple of 2, do so here
+      RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
+    }
+    if (nullable_) {
+      util::set_bit(null_bits_, length_, is_null);
+    }
+    raw_buffer()[length_++] = val;
+    return Status::OK();
+  }
+
+  // Vector append
+  //
+  // If passed, null_bytes is of equal length to values, and any nonzero byte
+  // will be considered as a null for that slot
+  Status Append(const T* values, int64_t length, uint8_t* null_bytes = nullptr) {
+    if (length_ + length > capacity_) {
+      int64_t new_capacity = util::next_power2(length_ + length);
+      RETURN_NOT_OK(Resize(new_capacity));
+    }
+    memcpy(raw_buffer() + length_, values, length * elsize_);
+
+    if (nullable_ && null_bytes != nullptr) {
+      // If null_bytes is all not null, then none of the values are null
+      for (int64_t i = 0; i < length; ++i) {
+        util::set_bit(null_bits_, length_ + i, static_cast<bool>(null_bytes[i]));
+      }
+    }
+
+    length_ += length;
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    if (!nullable_) {
+      return Status::Invalid("not nullable");
+    }
+    if (length_ == capacity_) {
+      // If the capacity was not already a multiple of 2, do so here
+      RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
+    }
+    util::set_bit(null_bits_, length_++, true);
+    return Status::OK();
+  }
+
+  // Initialize an array type instance with the results of this builder
+  // Transfers ownership of all buffers
+  Status Transfer(PrimitiveArray* out) {
+    out->Init(type_, length_, values_, nulls_);
+    values_ = nulls_ = nullptr;
+    capacity_ = length_ = 0;
+    return Status::OK();
+  }
+
+  Status Transfer(ArrayType* out) {
+    return Transfer(static_cast<PrimitiveArray*>(out));
+  }
+
+  virtual Status ToArray(Array** out) {
+    ArrayType* result = new ArrayType();
+    RETURN_NOT_OK(Transfer(result));
+    *out = static_cast<Array*>(result);
+    return Status::OK();
+  }
+
+  T* raw_buffer() {
+    return reinterpret_cast<T*>(values_->mutable_data());
+  }
+
+  std::shared_ptr<Buffer> buffer() const {
+    return values_;
+  }
+
+ protected:
+  std::shared_ptr<OwnedMutableBuffer> values_;
+  int64_t elsize_;
+};
+
+} // namespace arrow
+
+#endif  // ARROW_TYPES_PRIMITIVE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
new file mode 100644
index 0000000..6dba3fd
--- /dev/null
+++ b/cpp/src/arrow/types/string-test.cc
@@ -0,0 +1,242 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/string.h"
+#include "arrow/types/test-common.h"
+#include "arrow/util/status.h"
+
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace arrow {
+
+
+TEST(TypesTest, TestCharType) {
+  CharType t1(5);
+
+  ASSERT_EQ(t1.type, TypeEnum::CHAR);
+  ASSERT_TRUE(t1.nullable);
+  ASSERT_EQ(t1.size, 5);
+
+  ASSERT_EQ(t1.ToString(), string("char(5)"));
+
+  // Test copy constructor
+  CharType t2 = t1;
+  ASSERT_EQ(t2.type, TypeEnum::CHAR);
+  ASSERT_TRUE(t2.nullable);
+  ASSERT_EQ(t2.size, 5);
+}
+
+
+TEST(TypesTest, TestVarcharType) {
+  VarcharType t1(5);
+
+  ASSERT_EQ(t1.type, TypeEnum::VARCHAR);
+  ASSERT_TRUE(t1.nullable);
+  ASSERT_EQ(t1.size, 5);
+  ASSERT_EQ(t1.physical_type.size, 6);
+
+  ASSERT_EQ(t1.ToString(), string("varchar(5)"));
+
+  // Test copy constructor
+  VarcharType t2 = t1;
+  ASSERT_EQ(t2.type, TypeEnum::VARCHAR);
+  ASSERT_TRUE(t2.nullable);
+  ASSERT_EQ(t2.size, 5);
+  ASSERT_EQ(t2.physical_type.size, 6);
+}
+
+TEST(TypesTest, TestStringType) {
+  StringType str;
+  StringType str_nn(false);
+
+  ASSERT_EQ(str.type, TypeEnum::STRING);
+  ASSERT_EQ(str.name(), string("string"));
+  ASSERT_TRUE(str.nullable);
+  ASSERT_FALSE(str_nn.nullable);
+}
+
+// ----------------------------------------------------------------------
+// String container
+
+class TestStringContainer : public ::testing::Test  {
+ public:
+  void SetUp() {
+    chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
+    offsets_ = {0, 1, 1, 1, 3, 6};
+    nulls_ = {0, 0, 1, 0, 0};
+    expected_ = {"a", "", "", "bb", "ccc"};
+
+    MakeArray();
+  }
+
+  void MakeArray() {
+    length_ = offsets_.size() - 1;
+    int64_t nchars = chars_.size();
+
+    value_buf_ = to_buffer(chars_);
+    values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));
+
+    offsets_buf_ = to_buffer(offsets_);
+
+    nulls_buf_ = bytes_to_null_buffer(nulls_.data(), nulls_.size());
+    strings_.Init(length_, offsets_buf_, values_, nulls_buf_);
+  }
+
+ protected:
+  vector<int32_t> offsets_;
+  vector<char> chars_;
+  vector<uint8_t> nulls_;
+
+  vector<string> expected_;
+
+  std::shared_ptr<Buffer> value_buf_;
+  std::shared_ptr<Buffer> offsets_buf_;
+  std::shared_ptr<Buffer> nulls_buf_;
+
+  int64_t length_;
+
+  ArrayPtr values_;
+  StringArray strings_;
+};
+
+
+TEST_F(TestStringContainer, TestArrayBasics) {
+  ASSERT_EQ(length_, strings_.length());
+  ASSERT_TRUE(strings_.nullable());
+}
+
+TEST_F(TestStringContainer, TestType) {
+  TypePtr type = strings_.type();
+
+  ASSERT_EQ(TypeEnum::STRING, type->type);
+  ASSERT_EQ(TypeEnum::STRING, strings_.type_enum());
+}
+
+
+TEST_F(TestStringContainer, TestListFunctions) {
+  int pos = 0;
+  for (size_t i = 0; i < expected_.size(); ++i) {
+    ASSERT_EQ(pos, strings_.value_offset(i));
+    ASSERT_EQ(expected_[i].size(), strings_.value_length(i));
+    pos += expected_[i].size();
+  }
+}
+
+
+TEST_F(TestStringContainer, TestDestructor) {
+  auto arr = std::make_shared<StringArray>(length_, offsets_buf_, values_, nulls_buf_);
+}
+
+TEST_F(TestStringContainer, TestGetString) {
+  for (size_t i = 0; i < expected_.size(); ++i) {
+    if (nulls_[i]) {
+      ASSERT_TRUE(strings_.IsNull(i));
+    } else {
+      ASSERT_EQ(expected_[i], strings_.GetString(i));
+    }
+  }
+}
+
+// ----------------------------------------------------------------------
+// String builder tests
+
+class TestStringBuilder : public TestBuilder {
+ public:
+  void SetUp() {
+    TestBuilder::SetUp();
+    type_ = TypePtr(new StringType());
+
+    ArrayBuilder* tmp;
+    ASSERT_OK(make_builder(type_, &tmp));
+    builder_.reset(static_cast<StringBuilder*>(tmp));
+  }
+
+  void Done() {
+    Array* out;
+    ASSERT_OK(builder_->ToArray(&out));
+    result_.reset(static_cast<StringArray*>(out));
+  }
+
+ protected:
+  TypePtr type_;
+
+  unique_ptr<StringBuilder> builder_;
+  unique_ptr<StringArray> result_;
+};
+
+TEST_F(TestStringBuilder, TestAttrs) {
+  ASSERT_FALSE(builder_->value_builder()->nullable());
+}
+
+TEST_F(TestStringBuilder, TestScalarAppend) {
+  vector<string> strings = {"a", "bb", "", "", "ccc"};
+  vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+
+  int N = strings.size();
+  int reps = 1000;
+
+  for (int j = 0; j < reps; ++j) {
+    for (int i = 0; i < N; ++i) {
+      if (is_null[i]) {
+        builder_->AppendNull();
+      } else {
+        builder_->Append(strings[i]);
+      }
+    }
+  }
+  Done();
+
+  ASSERT_EQ(reps * N, result_->length());
+  ASSERT_EQ(reps * 6, result_->values()->length());
+
+  int64_t length;
+  int64_t pos = 0;
+  for (int i = 0; i < N * reps; ++i) {
+    if (is_null[i % N]) {
+      ASSERT_TRUE(result_->IsNull(i));
+    } else {
+      ASSERT_FALSE(result_->IsNull(i));
+      result_->GetValue(i, &length);
+      ASSERT_EQ(pos, result_->offset(i));
+      ASSERT_EQ(strings[i % N].size(), length);
+      ASSERT_EQ(strings[i % N], result_->GetString(i));
+
+      pos += length;
+    }
+  }
+}
+
+TEST_F(TestStringBuilder, TestZeroLength) {
+  // All buffers are null
+  Done();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/string.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc
new file mode 100644
index 0000000..f3dfbdc
--- /dev/null
+++ b/cpp/src/arrow/types/string.cc
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/types/string.h"
+
+#include <sstream>
+#include <string>
+
+namespace arrow {
+
+std::string CharType::ToString() const {
+  std::stringstream s;
+  s << "char(" << size << ")";
+  return s.str();
+}
+
+
+std::string VarcharType::ToString() const {
+  std::stringstream s;
+  s << "varchar(" << size << ")";
+  return s.str();
+}
+
+TypePtr StringBuilder::value_type_ = TypePtr(new UInt8Type(false));
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
new file mode 100644
index 0000000..30d6e24
--- /dev/null
+++ b/cpp/src/arrow/types/string.h
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPES_STRING_H
+#define ARROW_TYPES_STRING_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/type.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/list.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+class ArrayBuilder;
+
+struct CharType : public DataType {
+  int size;
+
+  BytesType physical_type;
+
+  explicit CharType(int size, bool nullable = true)
+      : DataType(TypeEnum::CHAR, nullable),
+        size(size),
+        physical_type(BytesType(size)) {}
+
+  CharType(const CharType& other)
+      : CharType(other.size, other.nullable) {}
+
+  virtual std::string ToString() const;
+};
+
+
+// Variable-length, null-terminated strings, up to a certain length
+struct VarcharType : public DataType {
+  int size;
+
+  BytesType physical_type;
+
+  explicit VarcharType(int size, bool nullable = true)
+      : DataType(TypeEnum::VARCHAR, nullable),
+        size(size),
+        physical_type(BytesType(size + 1)) {}
+  VarcharType(const VarcharType& other)
+      : VarcharType(other.size, other.nullable) {}
+
+  virtual std::string ToString() const;
+};
+
+static const LayoutPtr byte1(new BytesType(1));
+static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1));
+
+// String is a logical type consisting of a physical list of 1-byte values
+struct StringType : public DataType {
+  explicit StringType(bool nullable = true)
+      : DataType(TypeEnum::STRING, nullable) {}
+
+  StringType(const StringType& other)
+      : StringType(other.nullable) {}
+
+  const LayoutPtr& physical_type() {
+    return physical_string;
+  }
+
+  static char const *name() {
+    return "string";
+  }
+
+  virtual std::string ToString() const {
+    return name();
+  }
+};
+
+
+// TODO: add a BinaryArray layer in between
+class StringArray : public ListArray {
+ public:
+  StringArray() : ListArray(), bytes_(nullptr), raw_bytes_(nullptr) {}
+
+  StringArray(int64_t length, const std::shared_ptr<Buffer>& offsets,
+      const ArrayPtr& values,
+      const std::shared_ptr<Buffer>& nulls = nullptr) {
+    Init(length, offsets, values, nulls);
+  }
+
+  void Init(const TypePtr& type, int64_t length,
+      const std::shared_ptr<Buffer>& offsets,
+      const ArrayPtr& values,
+      const std::shared_ptr<Buffer>& nulls = nullptr) {
+    ListArray::Init(type, length, offsets, values, nulls);
+
+    // TODO: type validation for values array
+
+    // For convenience
+    bytes_ = static_cast<UInt8Array*>(values.get());
+    raw_bytes_ = bytes_->raw_data();
+  }
+
+  void Init(int64_t length, const std::shared_ptr<Buffer>& offsets,
+      const ArrayPtr& values,
+      const std::shared_ptr<Buffer>& nulls = nullptr) {
+    TypePtr type(new StringType(nulls != nullptr));
+    Init(type, length, offsets, values, nulls);
+  }
+
+  // Compute the pointer t
+  const uint8_t* GetValue(int64_t i, int64_t* out_length) const {
+    int32_t pos = offsets_[i];
+    *out_length = offsets_[i + 1] - pos;
+    return raw_bytes_ + pos;
+  }
+
+  // Construct a std::string
+  std::string GetString(int64_t i) const {
+    int64_t nchars;
+    const uint8_t* str = GetValue(i, &nchars);
+    return std::string(reinterpret_cast<const char*>(str), nchars);
+  }
+
+ private:
+  UInt8Array* bytes_;
+  const uint8_t* raw_bytes_;
+};
+
+// Array builder
+
+
+
+class StringBuilder : public ListBuilder {
+ public:
+  explicit StringBuilder(const TypePtr& type) :
+      ListBuilder(type, static_cast<ArrayBuilder*>(new UInt8Builder(value_type_))) {
+    byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
+  }
+
+  Status Append(const std::string& value) {
+    RETURN_NOT_OK(ListBuilder::Append());
+    return byte_builder_->Append(reinterpret_cast<const uint8_t*>(value.c_str()),
+        value.size());
+  }
+
+  Status Append(const uint8_t* value, int64_t length);
+  Status Append(const std::vector<std::string>& values,
+                uint8_t* null_bytes);
+
+  virtual Status ToArray(Array** out) {
+    StringArray* result = new StringArray();
+    RETURN_NOT_OK(ListBuilder::Transfer(result));
+    *out = static_cast<Array*>(result);
+    return Status::OK();
+  }
+
+ protected:
+  UInt8Builder* byte_builder_;
+
+  static TypePtr value_type_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_TYPES_STRING_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
new file mode 100644
index 0000000..644b545
--- /dev/null
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+#include "arrow/field.h"
+#include "arrow/type.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/string.h"
+#include "arrow/types/struct.h"
+
+using std::string;
+using std::vector;
+
+namespace arrow {
+
+TEST(TestStructType, Basics) {
+  TypePtr f0_type = TypePtr(new Int32Type());
+  Field f0("f0", f0_type);
+
+  TypePtr f1_type = TypePtr(new StringType());
+  Field f1("f1", f1_type);
+
+  TypePtr f2_type = TypePtr(new UInt8Type());
+  Field f2("f2", f2_type);
+
+  vector<Field> fields = {f0, f1, f2};
+
+  StructType struct_type(fields, true);
+  StructType struct_type_nn(fields, false);
+
+  ASSERT_TRUE(struct_type.nullable);
+  ASSERT_FALSE(struct_type_nn.nullable);
+
+  ASSERT_TRUE(struct_type.field(0).Equals(f0));
+  ASSERT_TRUE(struct_type.field(1).Equals(f1));
+  ASSERT_TRUE(struct_type.field(2).Equals(f2));
+
+  ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
+
+  // TODO: out of bounds for field(...)
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
new file mode 100644
index 0000000..b7be5d8
--- /dev/null
+++ b/cpp/src/arrow/types/struct.cc
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/types/struct.h"
+
+#include <memory>
+#include <sstream>
+#include <string>
+
+namespace arrow {
+
+std::string StructType::ToString() const {
+  std::stringstream s;
+  s << "struct<";
+  for (size_t i = 0; i < fields_.size(); ++i) {
+    if (i > 0) s << ", ";
+    const Field& field  = fields_[i];
+    s << field.name << ": " << field.type->ToString();
+  }
+  s << ">";
+  return s.str();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
new file mode 100644
index 0000000..7d8885b
--- /dev/null
+++ b/cpp/src/arrow/types/struct.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPES_STRUCT_H
+#define ARROW_TYPES_STRUCT_H
+
+#include <string>
+#include <vector>
+
+#include "arrow/field.h"
+#include "arrow/type.h"
+
+namespace arrow {
+
+struct StructType : public DataType {
+  std::vector<Field> fields_;
+
+  StructType(const std::vector<Field>& fields,
+      bool nullable = true)
+      : DataType(TypeEnum::STRUCT, nullable) {
+    fields_ = fields;
+  }
+
+  const Field& field(int i) const {
+    return fields_[i];
+  }
+
+  int num_children() const {
+    return fields_.size();
+  }
+
+  virtual std::string ToString() const;
+};
+
+} // namespace arrow
+
+#endif // ARROW_TYPES_STRUCT_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
new file mode 100644
index 0000000..267e48a
--- /dev/null
+++ b/cpp/src/arrow/types/test-common.h
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPES_TEST_COMMON_H
+#define ARROW_TYPES_TEST_COMMON_H
+
+#include <gtest/gtest.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+using std::unique_ptr;
+
+namespace arrow {
+
+class TestBuilder : public ::testing::Test {
+ public:
+  void SetUp() {
+    type_ = TypePtr(new UInt8Type());
+    type_nn_ = TypePtr(new UInt8Type(false));
+    builder_.reset(new UInt8Builder(type_));
+    builder_nn_.reset(new UInt8Builder(type_nn_));
+  }
+ protected:
+  TypePtr type_;
+  TypePtr type_nn_;
+  unique_ptr<ArrayBuilder> builder_;
+  unique_ptr<ArrayBuilder> builder_nn_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_TYPES_TEST_COMMON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/union.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.cc b/cpp/src/arrow/types/union.cc
new file mode 100644
index 0000000..54f41a7
--- /dev/null
+++ b/cpp/src/arrow/types/union.cc
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/types/union.h"
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+static inline std::string format_union(const std::vector<TypePtr>& child_types) {
+  std::stringstream s;
+  s << "union<";
+  for (size_t i = 0; i < child_types.size(); ++i) {
+    if (i) s << ", ";
+    s << child_types[i]->ToString();
+  }
+  s << ">";
+  return s.str();
+}
+
+std::string DenseUnionType::ToString() const {
+  return format_union(child_types_);
+}
+
+
+std::string SparseUnionType::ToString() const {
+  return format_union(child_types_);
+}
+
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/types/union.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h
new file mode 100644
index 0000000..7b66c3b
--- /dev/null
+++ b/cpp/src/arrow/types/union.h
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPES_UNION_H
+#define ARROW_TYPES_UNION_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/type.h"
+#include "arrow/types/collection.h"
+
+namespace arrow {
+
+class Buffer;
+
+struct DenseUnionType : public CollectionType<TypeEnum::DENSE_UNION> {
+  typedef CollectionType<TypeEnum::DENSE_UNION> Base;
+
+  DenseUnionType(const std::vector<TypePtr>& child_types,
+      bool nullable = true)
+      : Base(nullable) {
+    child_types_ = child_types;
+  }
+
+  virtual std::string ToString() const;
+};
+
+
+struct SparseUnionType : public CollectionType<TypeEnum::SPARSE_UNION> {
+  typedef CollectionType<TypeEnum::SPARSE_UNION> Base;
+
+  SparseUnionType(const std::vector<TypePtr>& child_types,
+      bool nullable = true)
+      : Base(nullable) {
+    child_types_ = child_types;
+  }
+
+  virtual std::string ToString() const;
+};
+
+
+class UnionArray : public Array {
+ public:
+  UnionArray() : Array() {}
+
+ protected:
+  // The data are types encoded as int16
+  Buffer* types_;
+  std::vector<std::shared_ptr<Array> > children_;
+};
+
+
+class DenseUnionArray : public UnionArray {
+ public:
+  DenseUnionArray() : UnionArray() {}
+
+ protected:
+  Buffer* offset_buf_;
+};
+
+
+class SparseUnionArray : public UnionArray {
+ public:
+  SparseUnionArray() : UnionArray() {}
+};
+
+} // namespace arrow
+
+#endif // ARROW_TYPES_UNION_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
new file mode 100644
index 0000000..88e3f7a
--- /dev/null
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#######################################
+# arrow_util
+#######################################
+
+set(UTIL_SRCS
+  bit-util.cc
+  buffer.cc
+  status.cc
+)
+
+set(UTIL_LIBS
+  rt)
+
+add_library(arrow_util STATIC
+  ${UTIL_SRCS}
+)
+target_link_libraries(arrow_util ${UTIL_LIBS})
+SET_TARGET_PROPERTIES(arrow_util PROPERTIES LINKER_LANGUAGE CXX)
+
+# Headers: top level
+install(FILES
+  bit-util.h
+  buffer.h
+  macros.h
+  status.h
+  DESTINATION include/arrow/util)
+
+#######################################
+# arrow_test_util
+#######################################
+
+add_library(arrow_test_util)
+target_link_libraries(arrow_test_util
+  arrow_util)
+
+SET_TARGET_PROPERTIES(arrow_test_util PROPERTIES LINKER_LANGUAGE CXX)
+
+#######################################
+# arrow_test_main
+#######################################
+
+add_library(arrow_test_main
+  test_main.cc)
+
+if (APPLE)
+  target_link_libraries(arrow_test_main
+    gtest
+	arrow_util
+	arrow_test_util
+    dl)
+  set_target_properties(arrow_test_main
+        PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+else()
+  target_link_libraries(arrow_test_main
+    gtest
+	arrow_util
+	arrow_test_util
+    pthread
+    dl
+  )
+endif()
+
+ADD_ARROW_TEST(bit-util-test)
+ADD_ARROW_TEST(buffer-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
new file mode 100644
index 0000000..7506ca5
--- /dev/null
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/util/bit-util.h"
+
+namespace arrow {
+
+TEST(UtilTests, TestNextPower2) {
+  using util::next_power2;
+
+  ASSERT_EQ(8, next_power2(6));
+  ASSERT_EQ(8, next_power2(8));
+
+  ASSERT_EQ(1, next_power2(1));
+  ASSERT_EQ(256, next_power2(131));
+
+  ASSERT_EQ(1024, next_power2(1000));
+
+  ASSERT_EQ(4096, next_power2(4000));
+
+  ASSERT_EQ(65536, next_power2(64000));
+
+  ASSERT_EQ(1LL << 32, next_power2((1LL << 32) - 1));
+  ASSERT_EQ(1LL << 31, next_power2((1LL << 31) - 1));
+  ASSERT_EQ(1LL << 62, next_power2((1LL << 62) - 1));
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/bit-util.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
new file mode 100644
index 0000000..d2ddd65
--- /dev/null
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstring>
+
+#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+void util::bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits) {
+  for (int i = 0; i < length; ++i) {
+    set_bit(bits, i, static_cast<bool>(bytes[i]));
+  }
+}
+
+Status util::bytes_to_bits(uint8_t* bytes, int length,
+    std::shared_ptr<Buffer>* out) {
+  int bit_length = ceil_byte(length) / 8;
+
+  auto buffer = std::make_shared<OwnedMutableBuffer>();
+  RETURN_NOT_OK(buffer->Resize(bit_length));
+  memset(buffer->mutable_data(), 0, bit_length);
+  bytes_to_bits(bytes, length, buffer->mutable_data());
+
+  *out = buffer;
+
+  return Status::OK();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
new file mode 100644
index 0000000..61dffa3
--- /dev/null
+++ b/cpp/src/arrow/util/bit-util.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_BIT_UTIL_H
+#define ARROW_UTIL_BIT_UTIL_H
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+
+#include "arrow/util/buffer.h"
+
+namespace arrow {
+
+class Status;
+
+namespace util {
+
+static inline int64_t ceil_byte(int64_t size) {
+  return (size + 7) & ~7;
+}
+
+static inline int64_t ceil_2bytes(int64_t size) {
+  return (size + 15) & ~15;
+}
+
+static inline bool get_bit(const uint8_t* bits, int i) {
+  return bits[i / 8] & (1 << (i % 8));
+}
+
+static inline void set_bit(uint8_t* bits, int i, bool is_set) {
+  bits[i / 8] |= (1 << (i % 8)) * is_set;
+}
+
+static inline int64_t next_power2(int64_t n) {
+  n--;
+  n |= n >> 1;
+  n |= n >> 2;
+  n |= n >> 4;
+  n |= n >> 8;
+  n |= n >> 16;
+  n |= n >> 32;
+  n++;
+  return n;
+}
+
+void bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits);
+Status bytes_to_bits(uint8_t*, int, std::shared_ptr<Buffer>*);
+
+} // namespace util
+
+} // namespace arrow
+
+#endif // ARROW_UTIL_BIT_UTIL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc
new file mode 100644
index 0000000..edfd08e
--- /dev/null
+++ b/cpp/src/arrow/util/buffer-test.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <cstdlib>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+
+#include "arrow/test-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+using std::string;
+
+namespace arrow {
+
+class TestBuffer : public ::testing::Test {
+};
+
+TEST_F(TestBuffer, Resize) {
+  OwnedMutableBuffer buf;
+
+  ASSERT_EQ(0, buf.size());
+  ASSERT_OK(buf.Resize(100));
+  ASSERT_EQ(100, buf.size());
+  ASSERT_OK(buf.Resize(200));
+  ASSERT_EQ(200, buf.size());
+
+  // Make it smaller, too
+  ASSERT_OK(buf.Resize(50));
+  ASSERT_EQ(50, buf.size());
+}
+
+TEST_F(TestBuffer, ResizeOOM) {
+  // realloc fails, even though there may be no explicit limit
+  OwnedMutableBuffer buf;
+  ASSERT_OK(buf.Resize(100));
+  int64_t to_alloc = std::numeric_limits<int64_t>::max();
+  ASSERT_RAISES(OutOfMemory, buf.Resize(to_alloc));
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
new file mode 100644
index 0000000..2fb34d5
--- /dev/null
+++ b/cpp/src/arrow/util/buffer.cc
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/buffer.h"
+
+#include <cstdint>
+
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset,
+    int64_t size) {
+  data_ = parent->data() + offset;
+  size_ = size;
+  parent_ = parent;
+}
+
+std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
+  return std::make_shared<Buffer>(this->get_shared_ptr(), 0, size());
+}
+
+OwnedMutableBuffer::OwnedMutableBuffer() :
+    MutableBuffer(nullptr, 0) {}
+
+Status OwnedMutableBuffer::Resize(int64_t new_size) {
+  size_ = new_size;
+  try {
+    buffer_owner_.resize(new_size);
+  } catch (const std::bad_alloc& e) {
+    return Status::OutOfMemory("resize failed");
+  }
+  data_ = buffer_owner_.data();
+  mutable_data_ = buffer_owner_.data();
+
+  return Status::OK();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h
new file mode 100644
index 0000000..3e41839
--- /dev/null
+++ b/cpp/src/arrow/util/buffer.h
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_BUFFER_H
+#define ARROW_UTIL_BUFFER_H
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+class Status;
+
+// ----------------------------------------------------------------------
+// Buffer classes
+
+// Immutable API for a chunk of bytes which may or may not be owned by the
+// class instance
+class Buffer : public std::enable_shared_from_this<Buffer> {
+ public:
+  Buffer(const uint8_t* data, int64_t size) :
+      data_(data),
+      size_(size) {}
+
+  // An offset into data that is owned by another buffer, but we want to be
+  // able to retain a valid pointer to it even after other shared_ptr's to the
+  // parent buffer have been destroyed
+  Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size);
+
+  std::shared_ptr<Buffer> get_shared_ptr() {
+    return shared_from_this();
+  }
+
+  // Return true if both buffers are the same size and contain the same bytes
+  // up to the number of compared bytes
+  bool Equals(const Buffer& other, int64_t nbytes) const {
+    return this == &other ||
+      (size_ >= nbytes && other.size_ >= nbytes &&
+          !memcmp(data_, other.data_, nbytes));
+  }
+
+  bool Equals(const Buffer& other) const {
+    return this == &other ||
+      (size_ == other.size_ && !memcmp(data_, other.data_, size_));
+  }
+
+  const uint8_t* data() const {
+    return data_;
+  }
+
+  int64_t size() const {
+    return size_;
+  }
+
+  // Returns true if this Buffer is referencing memory (possibly) owned by some
+  // other buffer
+  bool is_shared() const {
+    return static_cast<bool>(parent_);
+  }
+
+  const std::shared_ptr<Buffer> parent() const {
+    return parent_;
+  }
+
+ protected:
+  const uint8_t* data_;
+  int64_t size_;
+
+  // nullptr by default, but may be set
+  std::shared_ptr<Buffer> parent_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Buffer);
+};
+
+// A Buffer whose contents can be mutated. May or may not own its data.
+class MutableBuffer : public Buffer {
+ public:
+  MutableBuffer(uint8_t* data, int64_t size) :
+      Buffer(data, size) {
+    mutable_data_ = data;
+  }
+
+  uint8_t* mutable_data() {
+    return mutable_data_;
+  }
+
+  // Get a read-only view of this buffer
+  std::shared_ptr<Buffer> GetImmutableView();
+
+ protected:
+  MutableBuffer() :
+      Buffer(nullptr, 0),
+      mutable_data_(nullptr) {}
+
+  uint8_t* mutable_data_;
+};
+
+// A MutableBuffer whose memory is owned by the class instance. For example,
+// for reading data out of files that you want to deallocate when this class is
+// garbage-collected
+class OwnedMutableBuffer : public MutableBuffer {
+ public:
+  OwnedMutableBuffer();
+  Status Resize(int64_t new_size);
+
+ private:
+  // TODO: aligned allocations
+  std::vector<uint8_t> buffer_owner_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_UTIL_BUFFER_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/macros.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
new file mode 100644
index 0000000..069e627
--- /dev/null
+++ b/cpp/src/arrow/util/macros.h
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_MACROS_H
+#define ARROW_UTIL_MACROS_H
+
+// From Google gutil
+#define DISALLOW_COPY_AND_ASSIGN(TypeName)      \
+  TypeName(const TypeName&) = delete;           \
+  void operator=(const TypeName&) = delete
+
+#endif // ARROW_UTIL_MACROS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/random.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/random.h b/cpp/src/arrow/util/random.h
new file mode 100644
index 0000000..64c197e
--- /dev/null
+++ b/cpp/src/arrow/util/random.h
@@ -0,0 +1,128 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// Moved from Kudu http://github.com/cloudera/kudu
+
+#ifndef ARROW_UTIL_RANDOM_H_
+#define ARROW_UTIL_RANDOM_H_
+
+#include <stdint.h>
+
+#include <cmath>
+
+namespace arrow {
+
+namespace random_internal {
+
+static const uint32_t M = 2147483647L;   // 2^31-1
+const double kTwoPi = 6.283185307179586476925286;
+
+} // namespace random_internal
+
+// A very simple random number generator.  Not especially good at
+// generating truly random bits, but good enough for our needs in this
+// package. This implementation is not thread-safe.
+class Random {
+ public:
+  explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
+    // Avoid bad seeds.
+    if (seed_ == 0 || seed_ == random_internal::M) {
+      seed_ = 1;
+    }
+  }
+
+  // Next pseudo-random 32-bit unsigned integer.
+  // FIXME: This currently only generates 31 bits of randomness.
+  // The MSB will always be zero.
+  uint32_t Next() {
+    static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
+    // We are computing
+    //       seed_ = (seed_ * A) % M,    where M = 2^31-1
+    //
+    // seed_ must not be zero or M, or else all subsequent computed values
+    // will be zero or M respectively.  For all other values, seed_ will end
+    // up cycling through every number in [1,M-1]
+    uint64_t product = seed_ * A;
+
+    // Compute (product % M) using the fact that ((x << 31) % M) == x.
+    seed_ = static_cast<uint32_t>((product >> 31) + (product & random_internal::M));
+    // The first reduction may overflow by 1 bit, so we may need to
+    // repeat.  mod == M is not possible; using > allows the faster
+    // sign-bit-based test.
+    if (seed_ > random_internal::M) {
+      seed_ -= random_internal::M;
+    }
+    return seed_;
+  }
+
+  // Alias for consistency with Next64
+  uint32_t Next32() { return Next(); }
+
+  // Next pseudo-random 64-bit unsigned integer.
+  // FIXME: This currently only generates 62 bits of randomness due to Next()
+  // only giving 31 bits of randomness. The 2 most significant bits will always
+  // be zero.
+  uint64_t Next64() {
+    uint64_t large = Next();
+    // Only shift by 31 bits so we end up with zeros in MSB and not scattered
+    // throughout the 64-bit word. This is due to the weakness in Next() noted
+    // above.
+    large <<= 31;
+    large |= Next();
+    return large;
+  }
+
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint32_t Uniform(uint32_t n) { return Next() % n; }
+
+  // Alias for consistency with Uniform64
+  uint32_t Uniform32(uint32_t n) { return Uniform(n); }
+
+  // Returns a uniformly distributed 64-bit value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint64_t Uniform64(uint64_t n) { return Next64() % n; }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(int n) { return (Next() % n) == 0; }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint32_t Skewed(int max_log) {
+    return Uniform(1 << Uniform(max_log + 1));
+  }
+
+  // Creates a normal distribution variable using the
+  // Box-Muller transform. See:
+  // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
+  // Adapted from WebRTC source code at:
+  // webrtc/trunk/modules/video_coding/main/test/test_util.cc
+  double Normal(double mean, double std_dev) {
+    double uniform1 = (Next() + 1.0) / (random_internal::M + 1.0);
+    double uniform2 = (Next() + 1.0) / (random_internal::M + 1.0);
+    return (mean + std_dev * sqrt(-2 * ::log(uniform1)) *
+        cos(random_internal::kTwoPi * uniform2));
+  }
+
+  // Return a random number between 0.0 and 1.0 inclusive.
+  double NextDoubleFraction() {
+    return Next() / static_cast<double>(random_internal::M + 1.0);
+  }
+
+ private:
+  uint32_t seed_;
+};
+
+
+uint32_t random_seed() {
+  // TODO: use system time to get a reasonably random seed
+  return 0;
+}
+
+
+} // namespace arrow
+
+#endif  // ARROW_UTIL_RANDOM_H_

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/src/arrow/util/status.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc
new file mode 100644
index 0000000..c64b8a3
--- /dev/null
+++ b/cpp/src/arrow/util/status.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation.  It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+#include "arrow/util/status.h"
+
+#include <assert.h>
+
+namespace arrow {
+
+Status::Status(StatusCode code, const std::string& msg, int16_t posix_code) {
+  assert(code != StatusCode::OK);
+  const uint32_t size = msg.size();
+  char* result = new char[size + 7];
+  memcpy(result, &size, sizeof(size));
+  result[4] = static_cast<char>(code);
+  memcpy(result + 5, &posix_code, sizeof(posix_code));
+  memcpy(result + 7, msg.c_str(), msg.size());
+  state_ = result;
+}
+
+const char* Status::CopyState(const char* state) {
+  uint32_t size;
+  memcpy(&size, state, sizeof(size));
+  char* result = new char[size + 7];
+  memcpy(result, state, size + 7);
+  return result;
+}
+
+} // namespace arrow


Mime
View raw message