arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [5/5] arrow git commit: ARROW-418: [C++] Array / Builder class code reorganization, flattening
Date Mon, 12 Dec 2016 22:17:54 GMT
ARROW-418: [C++] Array / Builder class code reorganization, flattening

I've been wanting to do this for a while -- it feels cleaner to me. I also am going to promote modules from arrow/util to the top level as well. I'm open to other ideas, too.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #236 from wesm/ARROW-418 and squashes the following commits:

6f556ea [Wes McKinney] Add missing math.h include for clang
9dc2e22 [Wes McKinney] Fix remaining old includes
6f7ae77 [Wes McKinney] Fixes, cpplint
66ac3f7 [Wes McKinney] Promote buffer.h/status.h/memory-pool.h to top level directory
8cdf059 [Wes McKinney] Consolidate Array and Builder classes in array.h, builder.h. Remove arrow/types subdirectory


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2c10d7cc
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2c10d7cc
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2c10d7cc

Branch: refs/heads/master
Commit: 2c10d7ccec3c07fb061e1988be16aecaf9916af4
Parents: 73fe556
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Mon Dec 12 17:17:31 2016 -0500
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon Dec 12 17:17:31 2016 -0500

----------------------------------------------------------------------
 cpp/CMakeLists.txt                         |  15 +-
 cpp/src/arrow/CMakeLists.txt               |  11 +
 cpp/src/arrow/api.h                        |  13 +-
 cpp/src/arrow/array-decimal-test.cc        |  40 ++
 cpp/src/arrow/array-list-test.cc           | 237 ++++++++++++
 cpp/src/arrow/array-primitive-test.cc      | 476 +++++++++++++++++++++++
 cpp/src/arrow/array-string-test.cc         | 358 ++++++++++++++++++
 cpp/src/arrow/array-struct-test.cc         | 391 +++++++++++++++++++
 cpp/src/arrow/array-test.cc                |   5 +-
 cpp/src/arrow/array.cc                     | 443 +++++++++++++++++++++-
 cpp/src/arrow/array.h                      | 373 +++++++++++++++++-
 cpp/src/arrow/buffer-test.cc               | 140 +++++++
 cpp/src/arrow/buffer.cc                    | 102 +++++
 cpp/src/arrow/buffer.h                     | 232 ++++++++++++
 cpp/src/arrow/builder.cc                   | 329 +++++++++++++++-
 cpp/src/arrow/builder.h                    | 315 +++++++++++++++-
 cpp/src/arrow/column-benchmark.cc          |   4 +-
 cpp/src/arrow/column-test.cc               |   1 -
 cpp/src/arrow/column.cc                    |   2 +-
 cpp/src/arrow/io/file.cc                   |   6 +-
 cpp/src/arrow/io/hdfs.cc                   |   6 +-
 cpp/src/arrow/io/interfaces.cc             |   4 +-
 cpp/src/arrow/io/io-file-test.cc           |   2 +-
 cpp/src/arrow/io/io-hdfs-test.cc           |   2 +-
 cpp/src/arrow/io/libhdfs_shim.cc           |   2 +-
 cpp/src/arrow/io/memory.cc                 |   5 +-
 cpp/src/arrow/io/test-common.h             |   4 +-
 cpp/src/arrow/ipc/adapter.cc               |   9 +-
 cpp/src/arrow/ipc/file.cc                  |   4 +-
 cpp/src/arrow/ipc/ipc-adapter-test.cc      |  10 +-
 cpp/src/arrow/ipc/ipc-file-test.cc         |  11 +-
 cpp/src/arrow/ipc/ipc-json-test.cc         |  18 +-
 cpp/src/arrow/ipc/ipc-metadata-test.cc     |   2 +-
 cpp/src/arrow/ipc/json-integration-test.cc |   2 +-
 cpp/src/arrow/ipc/json-internal.cc         |  10 +-
 cpp/src/arrow/ipc/json.cc                  |   6 +-
 cpp/src/arrow/ipc/metadata-internal.cc     |   4 +-
 cpp/src/arrow/ipc/metadata.cc              |   4 +-
 cpp/src/arrow/ipc/test-common.h            |   9 +-
 cpp/src/arrow/ipc/util.h                   |   2 +-
 cpp/src/arrow/memory_pool-test.cc          |  69 ++++
 cpp/src/arrow/memory_pool.cc               | 111 ++++++
 cpp/src/arrow/memory_pool.h                |  43 +++
 cpp/src/arrow/pretty_print-test.cc         |   5 +-
 cpp/src/arrow/pretty_print.cc              |   5 +-
 cpp/src/arrow/status-test.cc               |  38 ++
 cpp/src/arrow/status.cc                    |  86 +++++
 cpp/src/arrow/status.h                     | 192 ++++++++++
 cpp/src/arrow/table-test.cc                |   4 +-
 cpp/src/arrow/table.cc                     |   2 +-
 cpp/src/arrow/test-util.h                  |  43 ++-
 cpp/src/arrow/type.cc                      |   8 +-
 cpp/src/arrow/type.h                       |   2 +-
 cpp/src/arrow/types/CMakeLists.txt         |  39 --
 cpp/src/arrow/types/construct.cc           | 124 ------
 cpp/src/arrow/types/construct.h            |  47 ---
 cpp/src/arrow/types/datetime.h             |  27 --
 cpp/src/arrow/types/decimal-test.cc        |  40 --
 cpp/src/arrow/types/decimal.cc             |  31 --
 cpp/src/arrow/types/decimal.h              |  28 --
 cpp/src/arrow/types/list-test.cc           | 241 ------------
 cpp/src/arrow/types/list.cc                | 162 --------
 cpp/src/arrow/types/list.h                 | 170 ---------
 cpp/src/arrow/types/primitive-test.cc      | 478 ------------------------
 cpp/src/arrow/types/primitive.cc           | 294 ---------------
 cpp/src/arrow/types/primitive.h            | 371 ------------------
 cpp/src/arrow/types/string-test.cc         | 360 ------------------
 cpp/src/arrow/types/string.cc              | 150 --------
 cpp/src/arrow/types/string.h               | 149 --------
 cpp/src/arrow/types/struct-test.cc         | 396 --------------------
 cpp/src/arrow/types/struct.cc              | 108 ------
 cpp/src/arrow/types/struct.h               | 116 ------
 cpp/src/arrow/types/test-common.h          |  70 ----
 cpp/src/arrow/types/union.cc               |  27 --
 cpp/src/arrow/types/union.h                |  48 ---
 cpp/src/arrow/util/CMakeLists.txt          |   6 -
 cpp/src/arrow/util/bit-util.cc             |   4 +-
 cpp/src/arrow/util/buffer-test.cc          | 140 -------
 cpp/src/arrow/util/buffer.cc               | 102 -----
 cpp/src/arrow/util/buffer.h                | 232 ------------
 cpp/src/arrow/util/memory-pool-test.cc     |  69 ----
 cpp/src/arrow/util/memory-pool.cc          | 111 ------
 cpp/src/arrow/util/memory-pool.h           |  43 ---
 cpp/src/arrow/util/status-test.cc          |  38 --
 cpp/src/arrow/util/status.cc               |  86 -----
 cpp/src/arrow/util/status.h                | 192 ----------
 python/src/pyarrow/adapters/builtin.cc     |   2 +-
 python/src/pyarrow/adapters/pandas.cc      |   2 +-
 python/src/pyarrow/common.cc               |   4 +-
 python/src/pyarrow/common.h                |   5 +-
 python/src/pyarrow/io.cc                   |   4 +-
 91 files changed, 4103 insertions(+), 4630 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 798d75f..adcca0e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -743,25 +743,17 @@ set(ARROW_PRIVATE_LINK_LIBS
 
 set(ARROW_SRCS
   src/arrow/array.cc
+  src/arrow/buffer.cc
   src/arrow/builder.cc
   src/arrow/column.cc
+  src/arrow/memory_pool.cc
   src/arrow/pretty_print.cc
   src/arrow/schema.cc
+  src/arrow/status.cc
   src/arrow/table.cc
   src/arrow/type.cc
 
-  src/arrow/types/construct.cc
-  src/arrow/types/decimal.cc
-  src/arrow/types/list.cc
-  src/arrow/types/primitive.cc
-  src/arrow/types/string.cc
-  src/arrow/types/struct.cc
-  src/arrow/types/union.cc
-
   src/arrow/util/bit-util.cc
-  src/arrow/util/buffer.cc
-  src/arrow/util/memory-pool.cc
-  src/arrow/util/status.cc
 )
 
 add_library(arrow_objlib OBJECT
@@ -823,7 +815,6 @@ endif()
 add_subdirectory(src/arrow)
 add_subdirectory(src/arrow/io)
 add_subdirectory(src/arrow/util)
-add_subdirectory(src/arrow/types)
 
 #----------------------------------------------------------------------
 # IPC library

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 6c0dea2..7d7bc29 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -20,9 +20,12 @@ install(FILES
   api.h
   array.h
   column.h
+  buffer.h
   builder.h
+  memory_pool.h
   pretty_print.h
   schema.h
+  status.h
   table.h
   type.h
   type_fwd.h
@@ -37,9 +40,17 @@ install(FILES
 set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
 
 ADD_ARROW_TEST(array-test)
+ADD_ARROW_TEST(array-decimal-test)
+ADD_ARROW_TEST(array-list-test)
+ADD_ARROW_TEST(array-primitive-test)
+ADD_ARROW_TEST(array-string-test)
+ADD_ARROW_TEST(array-struct-test)
+ADD_ARROW_TEST(buffer-test)
 ADD_ARROW_TEST(column-test)
+ADD_ARROW_TEST(memory_pool-test)
 ADD_ARROW_TEST(pretty_print-test)
 ADD_ARROW_TEST(schema-test)
+ADD_ARROW_TEST(status-test)
 ADD_ARROW_TEST(table-test)
 
 ADD_ARROW_BENCHMARK(column-benchmark)

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 2d317b4..51437d8 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -21,20 +21,13 @@
 #define ARROW_API_H
 
 #include "arrow/array.h"
+#include "arrow/buffer.h"
 #include "arrow/builder.h"
 #include "arrow/column.h"
+#include "arrow/memory_pool.h"
 #include "arrow/schema.h"
+#include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/type.h"
 
-#include "arrow/types/construct.h"
-#include "arrow/types/list.h"
-#include "arrow/types/primitive.h"
-#include "arrow/types/string.h"
-#include "arrow/types/struct.h"
-
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
-#include "arrow/util/status.h"
-
 #endif  // ARROW_API_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-decimal-test.cc b/cpp/src/arrow/array-decimal-test.cc
new file mode 100644
index 0000000..9e00fd9
--- /dev/null
+++ b/cpp/src/arrow/array-decimal-test.cc
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+TEST(TypesTest, TestDecimalType) {
+  DecimalType t1(8, 4);
+
+  ASSERT_EQ(t1.type, Type::DECIMAL);
+  ASSERT_EQ(t1.precision, 8);
+  ASSERT_EQ(t1.scale, 4);
+
+  ASSERT_EQ(t1.ToString(), std::string("decimal(8, 4)"));
+
+  // Test copy constructor
+  DecimalType t2 = t1;
+  ASSERT_EQ(t2.type, Type::DECIMAL);
+  ASSERT_EQ(t2.precision, 8);
+  ASSERT_EQ(t2.scale, 4);
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc
new file mode 100644
index 0000000..8baaf06
--- /dev/null
+++ b/cpp/src/arrow/array-list-test.cc
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace arrow {
+
+TEST(TypesTest, TestListType) {
+  std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
+
+  ListType list_type(vt);
+  ASSERT_EQ(list_type.type, Type::LIST);
+
+  ASSERT_EQ(list_type.name(), string("list"));
+  ASSERT_EQ(list_type.ToString(), string("list<item: uint8>"));
+
+  ASSERT_EQ(list_type.value_type()->type, vt->type);
+  ASSERT_EQ(list_type.value_type()->type, vt->type);
+
+  std::shared_ptr<DataType> st = std::make_shared<StringType>();
+  std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
+  ASSERT_EQ(lt->ToString(), string("list<item: string>"));
+
+  ListType lt2(lt);
+  ASSERT_EQ(lt2.ToString(), string("list<item: list<item: string>>"));
+}
+
+// ----------------------------------------------------------------------
+// List tests
+
+class TestListBuilder : public TestBuilder {
+ public:
+  void SetUp() {
+    TestBuilder::SetUp();
+
+    value_type_ = TypePtr(new Int32Type());
+    type_ = TypePtr(new ListType(value_type_));
+
+    std::shared_ptr<ArrayBuilder> tmp;
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+    builder_ = std::dynamic_pointer_cast<ListBuilder>(tmp);
+  }
+
+  void Done() {
+    std::shared_ptr<Array> out;
+    EXPECT_OK(builder_->Finish(&out));
+    result_ = std::dynamic_pointer_cast<ListArray>(out);
+  }
+
+ protected:
+  TypePtr value_type_;
+  TypePtr type_;
+
+  shared_ptr<ListBuilder> builder_;
+  shared_ptr<ListArray> result_;
+};
+
+TEST_F(TestListBuilder, Equality) {
+  Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+
+  ArrayPtr array, equal_array, unequal_array;
+  vector<int32_t> equal_offsets = {0, 1, 2, 5};
+  vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2};
+  vector<int32_t> unequal_offsets = {0, 1, 4};
+  vector<int32_t> unequal_values = {1, 2, 2, 2, 3, 4, 5};
+
+  // setup two equal arrays
+  ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
+  ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
+
+  ASSERT_OK(builder_->Finish(&array));
+  ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
+  ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
+
+  ASSERT_OK(builder_->Finish(&equal_array));
+  // now an unequal one
+  ASSERT_OK(builder_->Append(unequal_offsets.data(), unequal_offsets.size()));
+  ASSERT_OK(vb->Append(unequal_values.data(), unequal_values.size()));
+
+  ASSERT_OK(builder_->Finish(&unequal_array));
+
+  // Test array equality
+  EXPECT_TRUE(array->Equals(array));
+  EXPECT_TRUE(array->Equals(equal_array));
+  EXPECT_TRUE(equal_array->Equals(array));
+  EXPECT_FALSE(equal_array->Equals(unequal_array));
+  EXPECT_FALSE(unequal_array->Equals(equal_array));
+
+  // Test range equality
+  EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array));
+  EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array));
+  EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array));
+  EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array));
+  EXPECT_TRUE(array->RangeEquals(3, 4, 1, unequal_array));
+}
+
+TEST_F(TestListBuilder, TestResize) {}
+
+TEST_F(TestListBuilder, TestAppendNull) {
+  ASSERT_OK(builder_->AppendNull());
+  ASSERT_OK(builder_->AppendNull());
+
+  Done();
+
+  ASSERT_OK(result_->Validate());
+  ASSERT_TRUE(result_->IsNull(0));
+  ASSERT_TRUE(result_->IsNull(1));
+
+  ASSERT_EQ(0, result_->raw_offsets()[0]);
+  ASSERT_EQ(0, result_->offset(1));
+  ASSERT_EQ(0, result_->offset(2));
+
+  Int32Array* values = static_cast<Int32Array*>(result_->values().get());
+  ASSERT_EQ(0, values->length());
+}
+
+void ValidateBasicListArray(const ListArray* result, const vector<int32_t>& values,
+    const vector<uint8_t>& is_valid) {
+  ASSERT_OK(result->Validate());
+  ASSERT_EQ(1, result->null_count());
+  ASSERT_EQ(0, result->values()->null_count());
+
+  ASSERT_EQ(3, result->length());
+  vector<int32_t> ex_offsets = {0, 3, 3, 7};
+  for (size_t i = 0; i < ex_offsets.size(); ++i) {
+    ASSERT_EQ(ex_offsets[i], result->offset(i));
+  }
+
+  for (int i = 0; i < result->length(); ++i) {
+    ASSERT_EQ(!static_cast<bool>(is_valid[i]), result->IsNull(i));
+  }
+
+  ASSERT_EQ(7, result->values()->length());
+  Int32Array* varr = static_cast<Int32Array*>(result->values().get());
+
+  for (size_t i = 0; i < values.size(); ++i) {
+    ASSERT_EQ(values[i], varr->Value(i));
+  }
+}
+
+TEST_F(TestListBuilder, TestBasics) {
+  vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+  vector<int> lengths = {3, 0, 4};
+  vector<uint8_t> is_valid = {1, 0, 1};
+
+  Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+
+  ASSERT_OK(builder_->Reserve(lengths.size()));
+  ASSERT_OK(vb->Reserve(values.size()));
+
+  int pos = 0;
+  for (size_t i = 0; i < lengths.size(); ++i) {
+    ASSERT_OK(builder_->Append(is_valid[i] > 0));
+    for (int j = 0; j < lengths[i]; ++j) {
+      vb->Append(values[pos++]);
+    }
+  }
+
+  Done();
+  ValidateBasicListArray(result_.get(), values, is_valid);
+}
+
+TEST_F(TestListBuilder, BulkAppend) {
+  vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+  vector<int> lengths = {3, 0, 4};
+  vector<uint8_t> is_valid = {1, 0, 1};
+  vector<int32_t> offsets = {0, 3, 3};
+
+  Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+  ASSERT_OK(vb->Reserve(values.size()));
+
+  builder_->Append(offsets.data(), offsets.size(), is_valid.data());
+  for (int32_t value : values) {
+    vb->Append(value);
+  }
+  Done();
+  ValidateBasicListArray(result_.get(), values, is_valid);
+}
+
+TEST_F(TestListBuilder, BulkAppendInvalid) {
+  vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+  vector<int> lengths = {3, 0, 4};
+  vector<uint8_t> is_null = {0, 1, 0};
+  vector<uint8_t> is_valid = {1, 0, 1};
+  vector<int32_t> offsets = {0, 2, 4};  // should be 0, 3, 3 given the is_null array
+
+  Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+  ASSERT_OK(vb->Reserve(values.size()));
+
+  builder_->Append(offsets.data(), offsets.size(), is_valid.data());
+  builder_->Append(offsets.data(), offsets.size(), is_valid.data());
+  for (int32_t value : values) {
+    vb->Append(value);
+  }
+
+  Done();
+  ASSERT_RAISES(Invalid, result_->Validate());
+}
+
+TEST_F(TestListBuilder, TestZeroLength) {
+  // All buffers are null
+  Done();
+  ASSERT_OK(result_->Validate());
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-primitive-test.cc b/cpp/src/arrow/array-primitive-test.cc
new file mode 100644
index 0000000..a10e240
--- /dev/null
+++ b/cpp/src/arrow/array-primitive-test.cc
@@ -0,0 +1,476 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+
+using std::string;
+using std::shared_ptr;
+using std::unique_ptr;
+using std::vector;
+
+namespace arrow {
+
+class Array;
+
+#define PRIMITIVE_TEST(KLASS, ENUM, NAME)   \
+  TEST(TypesTest, TestPrimitive_##ENUM) {   \
+    KLASS tp;                               \
+                                            \
+    ASSERT_EQ(tp.type, Type::ENUM);         \
+    ASSERT_EQ(tp.ToString(), string(NAME)); \
+                                            \
+    KLASS tp_copy = tp;                     \
+    ASSERT_EQ(tp_copy.type, Type::ENUM);    \
+  }
+
+PRIMITIVE_TEST(Int8Type, INT8, "int8");
+PRIMITIVE_TEST(Int16Type, INT16, "int16");
+PRIMITIVE_TEST(Int32Type, INT32, "int32");
+PRIMITIVE_TEST(Int64Type, INT64, "int64");
+PRIMITIVE_TEST(UInt8Type, UINT8, "uint8");
+PRIMITIVE_TEST(UInt16Type, UINT16, "uint16");
+PRIMITIVE_TEST(UInt32Type, UINT32, "uint32");
+PRIMITIVE_TEST(UInt64Type, UINT64, "uint64");
+
+PRIMITIVE_TEST(FloatType, FLOAT, "float");
+PRIMITIVE_TEST(DoubleType, DOUBLE, "double");
+
+PRIMITIVE_TEST(BooleanType, BOOL, "bool");
+
+// ----------------------------------------------------------------------
+// Primitive type tests
+
+TEST_F(TestBuilder, TestReserve) {
+  builder_->Init(10);
+  ASSERT_EQ(2, builder_->null_bitmap()->size());
+
+  builder_->Reserve(30);
+  ASSERT_EQ(4, builder_->null_bitmap()->size());
+}
+
+template <typename Attrs>
+class TestPrimitiveBuilder : public TestBuilder {
+ public:
+  typedef typename Attrs::ArrayType ArrayType;
+  typedef typename Attrs::BuilderType BuilderType;
+  typedef typename Attrs::T T;
+  typedef typename Attrs::Type Type;
+
+  virtual void SetUp() {
+    TestBuilder::SetUp();
+
+    type_ = Attrs::type();
+
+    std::shared_ptr<ArrayBuilder> tmp;
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+    builder_ = std::dynamic_pointer_cast<BuilderType>(tmp);
+
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+    builder_nn_ = std::dynamic_pointer_cast<BuilderType>(tmp);
+  }
+
+  void RandomData(int N, double pct_null = 0.1) {
+    Attrs::draw(N, &draws_);
+
+    valid_bytes_.resize(N);
+    test::random_null_bytes(N, pct_null, valid_bytes_.data());
+  }
+
+  void Check(const std::shared_ptr<BuilderType>& builder, bool nullable) {
+    int size = builder->length();
+
+    auto ex_data = std::make_shared<Buffer>(
+        reinterpret_cast<uint8_t*>(draws_.data()), size * sizeof(T));
+
+    std::shared_ptr<Buffer> ex_null_bitmap;
+    int32_t ex_null_count = 0;
+
+    if (nullable) {
+      ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+      ex_null_count = test::null_count(valid_bytes_);
+    } else {
+      ex_null_bitmap = nullptr;
+    }
+
+    auto expected =
+        std::make_shared<ArrayType>(size, ex_data, ex_null_count, ex_null_bitmap);
+
+    std::shared_ptr<Array> out;
+    ASSERT_OK(builder->Finish(&out));
+
+    std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(out);
+
+    // Builder is now reset
+    ASSERT_EQ(0, builder->length());
+    ASSERT_EQ(0, builder->capacity());
+    ASSERT_EQ(0, builder->null_count());
+    ASSERT_EQ(nullptr, builder->data());
+
+    ASSERT_EQ(ex_null_count, result->null_count());
+    ASSERT_TRUE(result->EqualsExact(*expected.get()));
+  }
+
+ protected:
+  std::shared_ptr<DataType> type_;
+  shared_ptr<BuilderType> builder_;
+  shared_ptr<BuilderType> builder_nn_;
+
+  vector<T> draws_;
+  vector<uint8_t> valid_bytes_;
+};
+
+#define PTYPE_DECL(CapType, c_type)               \
+  typedef CapType##Array ArrayType;               \
+  typedef CapType##Builder BuilderType;           \
+  typedef CapType##Type Type;                     \
+  typedef c_type T;                               \
+                                                  \
+  static std::shared_ptr<DataType> type() {       \
+    return std::shared_ptr<DataType>(new Type()); \
+  }
+
+#define PINT_DECL(CapType, c_type, LOWER, UPPER) \
+  struct P##CapType {                            \
+    PTYPE_DECL(CapType, c_type);                 \
+    static void draw(int N, vector<T>* draws) {  \
+      test::randint<T>(N, LOWER, UPPER, draws);  \
+    }                                            \
+  }
+
+#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER)     \
+  struct P##CapType {                                  \
+    PTYPE_DECL(CapType, c_type);                       \
+    static void draw(int N, vector<T>* draws) {        \
+      test::random_real<T>(N, 0, LOWER, UPPER, draws); \
+    }                                                  \
+  }
+
+PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX);
+PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX);
+PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX);
+PINT_DECL(UInt64, uint64_t, 0, UINT64_MAX);
+
+PINT_DECL(Int8, int8_t, INT8_MIN, INT8_MAX);
+PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX);
+PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX);
+PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX);
+
+PFLOAT_DECL(Float, float, -1000, 1000);
+PFLOAT_DECL(Double, double, -1000, 1000);
+
+struct PBoolean {
+  PTYPE_DECL(Boolean, uint8_t);
+};
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::RandomData(int N, double pct_null) {
+  draws_.resize(N);
+  valid_bytes_.resize(N);
+
+  test::random_null_bytes(N, 0.5, draws_.data());
+  test::random_null_bytes(N, pct_null, valid_bytes_.data());
+}
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::Check(
+    const std::shared_ptr<BooleanBuilder>& builder, bool nullable) {
+  int size = builder->length();
+
+  auto ex_data = test::bytes_to_null_buffer(draws_);
+
+  std::shared_ptr<Buffer> ex_null_bitmap;
+  int32_t ex_null_count = 0;
+
+  if (nullable) {
+    ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+    ex_null_count = test::null_count(valid_bytes_);
+  } else {
+    ex_null_bitmap = nullptr;
+  }
+
+  auto expected =
+      std::make_shared<BooleanArray>(size, ex_data, ex_null_count, ex_null_bitmap);
+
+  std::shared_ptr<Array> out;
+  ASSERT_OK(builder->Finish(&out));
+  std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(out);
+
+  // Builder is now reset
+  ASSERT_EQ(0, builder->length());
+  ASSERT_EQ(0, builder->capacity());
+  ASSERT_EQ(0, builder->null_count());
+  ASSERT_EQ(nullptr, builder->data());
+
+  ASSERT_EQ(ex_null_count, result->null_count());
+
+  ASSERT_EQ(expected->length(), result->length());
+
+  for (int i = 0; i < result->length(); ++i) {
+    if (nullable) { ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; }
+    bool actual = BitUtil::GetBit(result->raw_data(), i);
+    ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
+  }
+  ASSERT_TRUE(result->EqualsExact(*expected.get()));
+}
+
+typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16,
+    PInt32, PInt64, PFloat, PDouble>
+    Primitives;
+
+TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
+
+#define DECL_T() typedef typename TestFixture::T T;
+
+#define DECL_TYPE() typedef typename TestFixture::Type Type;
+
+#define DECL_ARRAYTYPE() typedef typename TestFixture::ArrayType ArrayType;
+
+TYPED_TEST(TestPrimitiveBuilder, TestInit) {
+  DECL_TYPE();
+
+  int n = 1000;
+  ASSERT_OK(this->builder_->Reserve(n));
+  ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
+  ASSERT_EQ(BitUtil::NextPower2(TypeTraits<Type>::bytes_required(n)),
+      this->builder_->data()->size());
+
+  // unsure if this should go in all builder classes
+  ASSERT_EQ(0, this->builder_->num_children());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
+  int size = 1000;
+  for (int i = 0; i < size; ++i) {
+    ASSERT_OK(this->builder_->AppendNull());
+  }
+
+  std::shared_ptr<Array> result;
+  ASSERT_OK(this->builder_->Finish(&result));
+
+  for (int i = 0; i < size; ++i) {
+    ASSERT_TRUE(result->IsNull(i)) << i;
+  }
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
+  DECL_T();
+
+  int size = 1000;
+
+  vector<T>& draws = this->draws_;
+  vector<uint8_t>& valid_bytes = this->valid_bytes_;
+
+  int64_t memory_before = this->pool_->bytes_allocated();
+
+  this->RandomData(size);
+
+  this->builder_->Reserve(size);
+
+  int i;
+  for (i = 0; i < size; ++i) {
+    if (valid_bytes[i] > 0) {
+      this->builder_->Append(draws[i]);
+    } else {
+      this->builder_->AppendNull();
+    }
+  }
+
+  do {
+    std::shared_ptr<Array> result;
+    ASSERT_OK(this->builder_->Finish(&result));
+  } while (false);
+
+  ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, Equality) {
+  DECL_T();
+
+  const int size = 1000;
+  this->RandomData(size);
+  vector<T>& draws = this->draws_;
+  vector<uint8_t>& valid_bytes = this->valid_bytes_;
+  ArrayPtr array, equal_array, unequal_array;
+  auto builder = this->builder_.get();
+  ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array));
+  ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array));
+
+  // Make the not equal array by negating the first valid element with itself.
+  const auto first_valid = std::find_if(
+      valid_bytes.begin(), valid_bytes.end(), [](uint8_t valid) { return valid > 0; });
+  const int first_valid_idx = std::distance(valid_bytes.begin(), first_valid);
+  // This should be true with a very high probability, but might introduce flakiness
+  ASSERT_LT(first_valid_idx, size - 1);
+  draws[first_valid_idx] = ~*reinterpret_cast<int64_t*>(&draws[first_valid_idx]);
+  ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array));
+
+  // test normal equality
+  EXPECT_TRUE(array->Equals(array));
+  EXPECT_TRUE(array->Equals(equal_array));
+  EXPECT_TRUE(equal_array->Equals(array));
+  EXPECT_FALSE(equal_array->Equals(unequal_array));
+  EXPECT_FALSE(unequal_array->Equals(equal_array));
+
+  // Test range equality
+  EXPECT_FALSE(array->RangeEquals(0, first_valid_idx + 1, 0, unequal_array));
+  EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, first_valid_idx, unequal_array));
+  EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, 0, unequal_array));
+  EXPECT_TRUE(
+      array->RangeEquals(first_valid_idx + 1, size, first_valid_idx + 1, unequal_array));
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
+  DECL_T();
+
+  const int size = 10000;
+
+  vector<T>& draws = this->draws_;
+  vector<uint8_t>& valid_bytes = this->valid_bytes_;
+
+  this->RandomData(size);
+
+  this->builder_->Reserve(1000);
+  this->builder_nn_->Reserve(1000);
+
+  int i;
+  int null_count = 0;
+  // Append the first 1000
+  for (i = 0; i < 1000; ++i) {
+    if (valid_bytes[i] > 0) {
+      this->builder_->Append(draws[i]);
+    } else {
+      this->builder_->AppendNull();
+      ++null_count;
+    }
+    this->builder_nn_->Append(draws[i]);
+  }
+
+  ASSERT_EQ(null_count, this->builder_->null_count());
+
+  ASSERT_EQ(1000, this->builder_->length());
+  ASSERT_EQ(1024, this->builder_->capacity());
+
+  ASSERT_EQ(1000, this->builder_nn_->length());
+  ASSERT_EQ(1024, this->builder_nn_->capacity());
+
+  this->builder_->Reserve(size - 1000);
+  this->builder_nn_->Reserve(size - 1000);
+
+  // Append the next 9000
+  for (i = 1000; i < size; ++i) {
+    if (valid_bytes[i] > 0) {
+      this->builder_->Append(draws[i]);
+    } else {
+      this->builder_->AppendNull();
+    }
+    this->builder_nn_->Append(draws[i]);
+  }
+
+  ASSERT_EQ(size, this->builder_->length());
+  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+
+  ASSERT_EQ(size, this->builder_nn_->length());
+  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
+
+  this->Check(this->builder_, true);
+  this->Check(this->builder_nn_, false);
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
+  DECL_T();
+
+  int size = 10000;
+  this->RandomData(size);
+
+  vector<T>& draws = this->draws_;
+  vector<uint8_t>& valid_bytes = this->valid_bytes_;
+
+  // first slug
+  int K = 1000;
+
+  ASSERT_OK(this->builder_->Append(draws.data(), K, valid_bytes.data()));
+  ASSERT_OK(this->builder_nn_->Append(draws.data(), K));
+
+  ASSERT_EQ(1000, this->builder_->length());
+  ASSERT_EQ(1024, this->builder_->capacity());
+
+  ASSERT_EQ(1000, this->builder_nn_->length());
+  ASSERT_EQ(1024, this->builder_nn_->capacity());
+
+  // Append the next 9000
+  ASSERT_OK(this->builder_->Append(draws.data() + K, size - K, valid_bytes.data() + K));
+  ASSERT_OK(this->builder_nn_->Append(draws.data() + K, size - K));
+
+  ASSERT_EQ(size, this->builder_->length());
+  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+
+  this->Check(this->builder_, true);
+  this->Check(this->builder_nn_, false);
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAdvance) {
+  int n = 1000;
+  ASSERT_OK(this->builder_->Reserve(n));
+
+  ASSERT_OK(this->builder_->Advance(100));
+  ASSERT_EQ(100, this->builder_->length());
+
+  ASSERT_OK(this->builder_->Advance(900));
+
+  int too_many = this->builder_->capacity() - 1000 + 1;
+  ASSERT_RAISES(Invalid, this->builder_->Advance(too_many));
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestResize) {
+  DECL_TYPE();
+
+  int cap = kMinBuilderCapacity * 2;
+
+  ASSERT_OK(this->builder_->Reserve(cap));
+  ASSERT_EQ(cap, this->builder_->capacity());
+
+  ASSERT_EQ(TypeTraits<Type>::bytes_required(cap), this->builder_->data()->size());
+  ASSERT_EQ(BitUtil::BytesForBits(cap), this->builder_->null_bitmap()->size());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
+  ASSERT_OK(this->builder_->Reserve(10));
+  ASSERT_EQ(0, this->builder_->length());
+  ASSERT_EQ(kMinBuilderCapacity, this->builder_->capacity());
+
+  ASSERT_OK(this->builder_->Reserve(90));
+  ASSERT_OK(this->builder_->Advance(100));
+  ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
+
+  ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-string-test.cc b/cpp/src/arrow/array-string-test.cc
new file mode 100644
index 0000000..b144c63
--- /dev/null
+++ b/cpp/src/arrow/array-string-test.cc
@@ -0,0 +1,358 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+namespace arrow {
+
+class Buffer;
+
+TEST(TypesTest, BinaryType) {
+  BinaryType t1;
+  BinaryType e1;
+  StringType t2;
+  EXPECT_TRUE(t1.Equals(&e1));
+  EXPECT_FALSE(t1.Equals(&t2));
+  ASSERT_EQ(t1.type, Type::BINARY);
+  ASSERT_EQ(t1.ToString(), std::string("binary"));
+}
+
+TEST(TypesTest, TestStringType) {
+  StringType str;
+  ASSERT_EQ(str.type, Type::STRING);
+  ASSERT_EQ(str.ToString(), std::string("string"));
+}
+
+// ----------------------------------------------------------------------
+// String container
+
+class TestStringContainer : public ::testing::Test {
+ public:
+  void SetUp() {
+    chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
+    offsets_ = {0, 1, 1, 1, 3, 6};
+    valid_bytes_ = {1, 1, 0, 1, 1};
+    expected_ = {"a", "", "", "bb", "ccc"};
+
+    MakeArray();
+  }
+
+  void MakeArray() {
+    length_ = offsets_.size() - 1;
+    value_buf_ = test::GetBufferFromVector(chars_);
+    offsets_buf_ = test::GetBufferFromVector(offsets_);
+    null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
+    null_count_ = test::null_count(valid_bytes_);
+
+    strings_ = std::make_shared<StringArray>(
+        length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+  }
+
+ protected:
+  std::vector<int32_t> offsets_;
+  std::vector<char> chars_;
+  std::vector<uint8_t> valid_bytes_;
+
+  std::vector<std::string> expected_;
+
+  std::shared_ptr<Buffer> value_buf_;
+  std::shared_ptr<Buffer> offsets_buf_;
+  std::shared_ptr<Buffer> null_bitmap_;
+
+  int null_count_;
+  int length_;
+
+  std::shared_ptr<StringArray> strings_;
+};
+
+TEST_F(TestStringContainer, TestArrayBasics) {
+  ASSERT_EQ(length_, strings_->length());
+  ASSERT_EQ(1, strings_->null_count());
+  ASSERT_OK(strings_->Validate());
+}
+
+TEST_F(TestStringContainer, TestType) {
+  TypePtr type = strings_->type();
+
+  ASSERT_EQ(Type::STRING, type->type);
+  ASSERT_EQ(Type::STRING, strings_->type_enum());
+}
+
+TEST_F(TestStringContainer, TestListFunctions) {
+  int pos = 0;
+  for (size_t i = 0; i < expected_.size(); ++i) {
+    ASSERT_EQ(pos, strings_->value_offset(i));
+    ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
+    pos += expected_[i].size();
+  }
+}
+
+TEST_F(TestStringContainer, TestDestructor) {
+  auto arr = std::make_shared<StringArray>(
+      length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+}
+
+TEST_F(TestStringContainer, TestGetString) {
+  for (size_t i = 0; i < expected_.size(); ++i) {
+    if (valid_bytes_[i] == 0) {
+      ASSERT_TRUE(strings_->IsNull(i));
+    } else {
+      ASSERT_EQ(expected_[i], strings_->GetString(i));
+    }
+  }
+}
+
+TEST_F(TestStringContainer, TestEmptyStringComparison) {
+  offsets_ = {0, 0, 0, 0, 0, 0};
+  offsets_buf_ = test::GetBufferFromVector(offsets_);
+  length_ = offsets_.size() - 1;
+
+  auto strings_a = std::make_shared<StringArray>(
+      length_, offsets_buf_, nullptr, null_count_, null_bitmap_);
+  auto strings_b = std::make_shared<StringArray>(
+      length_, offsets_buf_, nullptr, null_count_, null_bitmap_);
+  ASSERT_TRUE(strings_a->Equals(strings_b));
+}
+
+// ----------------------------------------------------------------------
+// String builder tests
+
+class TestStringBuilder : public TestBuilder {
+ public:
+  void SetUp() {
+    TestBuilder::SetUp();
+    type_ = TypePtr(new StringType());
+    builder_.reset(new StringBuilder(pool_, type_));
+  }
+
+  void Done() {
+    std::shared_ptr<Array> out;
+    EXPECT_OK(builder_->Finish(&out));
+
+    result_ = std::dynamic_pointer_cast<StringArray>(out);
+    result_->Validate();
+  }
+
+ protected:
+  TypePtr type_;
+
+  std::unique_ptr<StringBuilder> builder_;
+  std::shared_ptr<StringArray> result_;
+};
+
+TEST_F(TestStringBuilder, TestScalarAppend) {
+  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+  std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+
+  int N = strings.size();
+  int reps = 1000;
+
+  for (int j = 0; j < reps; ++j) {
+    for (int i = 0; i < N; ++i) {
+      if (is_null[i]) {
+        builder_->AppendNull();
+      } else {
+        builder_->Append(strings[i]);
+      }
+    }
+  }
+  Done();
+
+  ASSERT_EQ(reps * N, result_->length());
+  ASSERT_EQ(reps, result_->null_count());
+  ASSERT_EQ(reps * 6, result_->data()->size());
+
+  int32_t length;
+  int32_t pos = 0;
+  for (int i = 0; i < N * reps; ++i) {
+    if (is_null[i % N]) {
+      ASSERT_TRUE(result_->IsNull(i));
+    } else {
+      ASSERT_FALSE(result_->IsNull(i));
+      result_->GetValue(i, &length);
+      ASSERT_EQ(pos, result_->offset(i));
+      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
+      ASSERT_EQ(strings[i % N], result_->GetString(i));
+
+      pos += length;
+    }
+  }
+}
+
+TEST_F(TestStringBuilder, TestZeroLength) {
+  // All buffers are null
+  Done();
+}
+
+// Binary container type
+// TODO(emkornfield) there should be some way to refactor these to avoid code duplicating
+// with String
+class TestBinaryContainer : public ::testing::Test {
+ public:
+  void SetUp() {
+    chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
+    offsets_ = {0, 1, 1, 1, 3, 6};
+    valid_bytes_ = {1, 1, 0, 1, 1};
+    expected_ = {"a", "", "", "bb", "ccc"};
+
+    MakeArray();
+  }
+
+  void MakeArray() {
+    length_ = offsets_.size() - 1;
+    value_buf_ = test::GetBufferFromVector(chars_);
+    offsets_buf_ = test::GetBufferFromVector(offsets_);
+
+    null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
+    null_count_ = test::null_count(valid_bytes_);
+
+    strings_ = std::make_shared<BinaryArray>(
+        length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+  }
+
+ protected:
+  std::vector<int32_t> offsets_;
+  std::vector<char> chars_;
+  std::vector<uint8_t> valid_bytes_;
+
+  std::vector<std::string> expected_;
+
+  std::shared_ptr<Buffer> value_buf_;
+  std::shared_ptr<Buffer> offsets_buf_;
+  std::shared_ptr<Buffer> null_bitmap_;
+
+  int null_count_;
+  int length_;
+
+  std::shared_ptr<BinaryArray> strings_;
+};
+
+TEST_F(TestBinaryContainer, TestArrayBasics) {
+  ASSERT_EQ(length_, strings_->length());
+  ASSERT_EQ(1, strings_->null_count());
+  ASSERT_OK(strings_->Validate());
+}
+
+TEST_F(TestBinaryContainer, TestType) {
+  TypePtr type = strings_->type();
+
+  ASSERT_EQ(Type::BINARY, type->type);
+  ASSERT_EQ(Type::BINARY, strings_->type_enum());
+}
+
+TEST_F(TestBinaryContainer, TestListFunctions) {
+  int pos = 0;
+  for (size_t i = 0; i < expected_.size(); ++i) {
+    ASSERT_EQ(pos, strings_->value_offset(i));
+    ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
+    pos += expected_[i].size();
+  }
+}
+
+TEST_F(TestBinaryContainer, TestDestructor) {
+  auto arr = std::make_shared<BinaryArray>(
+      length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+}
+
+TEST_F(TestBinaryContainer, TestGetValue) {
+  for (size_t i = 0; i < expected_.size(); ++i) {
+    if (valid_bytes_[i] == 0) {
+      ASSERT_TRUE(strings_->IsNull(i));
+    } else {
+      int32_t len = -1;
+      const uint8_t* bytes = strings_->GetValue(i, &len);
+      ASSERT_EQ(0, std::memcmp(expected_[i].data(), bytes, len));
+    }
+  }
+}
+
+class TestBinaryBuilder : public TestBuilder {
+ public:
+  void SetUp() {
+    TestBuilder::SetUp();
+    type_ = TypePtr(new BinaryType());
+    builder_.reset(new BinaryBuilder(pool_, type_));
+  }
+
+  void Done() {
+    std::shared_ptr<Array> out;
+    EXPECT_OK(builder_->Finish(&out));
+
+    result_ = std::dynamic_pointer_cast<BinaryArray>(out);
+    result_->Validate();
+  }
+
+ protected:
+  TypePtr type_;
+
+  std::unique_ptr<BinaryBuilder> builder_;
+  std::shared_ptr<BinaryArray> result_;
+};
+
+TEST_F(TestBinaryBuilder, TestScalarAppend) {
+  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+  std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+
+  int N = strings.size();
+  int reps = 1000;
+
+  for (int j = 0; j < reps; ++j) {
+    for (int i = 0; i < N; ++i) {
+      if (is_null[i]) {
+        builder_->AppendNull();
+      } else {
+        builder_->Append(
+            reinterpret_cast<const uint8_t*>(strings[i].data()), strings[i].size());
+      }
+    }
+  }
+  Done();
+  ASSERT_OK(result_->Validate());
+  ASSERT_EQ(reps * N, result_->length());
+  ASSERT_EQ(reps, result_->null_count());
+  ASSERT_EQ(reps * 6, result_->data()->size());
+
+  int32_t length;
+  for (int i = 0; i < N * reps; ++i) {
+    if (is_null[i % N]) {
+      ASSERT_TRUE(result_->IsNull(i));
+    } else {
+      ASSERT_FALSE(result_->IsNull(i));
+      const uint8_t* vals = result_->GetValue(i, &length);
+      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
+      ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
+    }
+  }
+}
+
+TEST_F(TestBinaryBuilder, TestZeroLength) {
+  // All buffers are null
+  Done();
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-struct-test.cc b/cpp/src/arrow/array-struct-test.cc
new file mode 100644
index 0000000..58386fe
--- /dev/null
+++ b/cpp/src/arrow/array-struct-test.cc
@@ -0,0 +1,391 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::string;
+using std::vector;
+
+namespace arrow {
+
+TEST(TestStructType, Basics) {
+  TypePtr f0_type = TypePtr(new Int32Type());
+  auto f0 = std::make_shared<Field>("f0", f0_type);
+
+  TypePtr f1_type = TypePtr(new StringType());
+  auto f1 = std::make_shared<Field>("f1", f1_type);
+
+  TypePtr f2_type = TypePtr(new UInt8Type());
+  auto f2 = std::make_shared<Field>("f2", f2_type);
+
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
+
+  StructType struct_type(fields);
+
+  ASSERT_TRUE(struct_type.child(0)->Equals(f0));
+  ASSERT_TRUE(struct_type.child(1)->Equals(f1));
+  ASSERT_TRUE(struct_type.child(2)->Equals(f2));
+
+  ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
+
+  // TODO(wesm): out of bounds for field(...)
+}
+
+void ValidateBasicStructArray(const StructArray* result,
+    const vector<uint8_t>& struct_is_valid, const vector<char>& list_values,
+    const vector<uint8_t>& list_is_valid, const vector<int>& list_lengths,
+    const vector<int>& list_offsets, const vector<int32_t>& int_values) {
+  ASSERT_EQ(4, result->length());
+  ASSERT_OK(result->Validate());
+
+  auto list_char_arr = static_cast<ListArray*>(result->field(0).get());
+  auto char_arr = static_cast<Int8Array*>(list_char_arr->values().get());
+  auto int32_arr = static_cast<Int32Array*>(result->field(1).get());
+
+  ASSERT_EQ(0, result->null_count());
+  ASSERT_EQ(1, list_char_arr->null_count());
+  ASSERT_EQ(0, int32_arr->null_count());
+
+  // List<char>
+  ASSERT_EQ(4, list_char_arr->length());
+  ASSERT_EQ(10, list_char_arr->values()->length());
+  for (size_t i = 0; i < list_offsets.size(); ++i) {
+    ASSERT_EQ(list_offsets[i], list_char_arr->raw_offsets()[i]);
+  }
+  for (size_t i = 0; i < list_values.size(); ++i) {
+    ASSERT_EQ(list_values[i], char_arr->Value(i));
+  }
+
+  // Int32
+  ASSERT_EQ(4, int32_arr->length());
+  for (size_t i = 0; i < int_values.size(); ++i) {
+    ASSERT_EQ(int_values[i], int32_arr->Value(i));
+  }
+}
+
+// ----------------------------------------------------------------------------------
+// Struct test
+class TestStructBuilder : public TestBuilder {
+ public:
+  void SetUp() {
+    TestBuilder::SetUp();
+
+    auto int32_type = TypePtr(new Int32Type());
+    auto char_type = TypePtr(new Int8Type());
+    auto list_type = TypePtr(new ListType(char_type));
+
+    std::vector<TypePtr> types = {list_type, int32_type};
+    std::vector<FieldPtr> fields;
+    fields.push_back(FieldPtr(new Field("list", list_type)));
+    fields.push_back(FieldPtr(new Field("int", int32_type)));
+
+    type_ = TypePtr(new StructType(fields));
+    value_fields_ = fields;
+
+    std::shared_ptr<ArrayBuilder> tmp;
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+
+    builder_ = std::dynamic_pointer_cast<StructBuilder>(tmp);
+    ASSERT_EQ(2, static_cast<int>(builder_->field_builders().size()));
+  }
+
+  void Done() {
+    std::shared_ptr<Array> out;
+    ASSERT_OK(builder_->Finish(&out));
+    result_ = std::dynamic_pointer_cast<StructArray>(out);
+  }
+
+ protected:
+  std::vector<FieldPtr> value_fields_;
+  TypePtr type_;
+
+  std::shared_ptr<StructBuilder> builder_;
+  std::shared_ptr<StructArray> result_;
+};
+
+TEST_F(TestStructBuilder, TestAppendNull) {
+  ASSERT_OK(builder_->AppendNull());
+  ASSERT_OK(builder_->AppendNull());
+  ASSERT_EQ(2, static_cast<int>(builder_->field_builders().size()));
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  ASSERT_OK(list_vb->AppendNull());
+  ASSERT_OK(list_vb->AppendNull());
+  ASSERT_EQ(2, list_vb->length());
+
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+  ASSERT_OK(int_vb->AppendNull());
+  ASSERT_OK(int_vb->AppendNull());
+  ASSERT_EQ(2, int_vb->length());
+
+  Done();
+
+  ASSERT_OK(result_->Validate());
+
+  ASSERT_EQ(2, static_cast<int>(result_->fields().size()));
+  ASSERT_EQ(2, result_->length());
+  ASSERT_EQ(2, result_->field(0)->length());
+  ASSERT_EQ(2, result_->field(1)->length());
+  ASSERT_TRUE(result_->IsNull(0));
+  ASSERT_TRUE(result_->IsNull(1));
+  ASSERT_TRUE(result_->field(0)->IsNull(0));
+  ASSERT_TRUE(result_->field(0)->IsNull(1));
+  ASSERT_TRUE(result_->field(1)->IsNull(0));
+  ASSERT_TRUE(result_->field(1)->IsNull(1));
+
+  ASSERT_EQ(Type::LIST, result_->field(0)->type_enum());
+  ASSERT_EQ(Type::INT32, result_->field(1)->type_enum());
+}
+
+TEST_F(TestStructBuilder, TestBasics) {
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6, 10};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+  ASSERT_EQ(2, static_cast<int>(builder_->field_builders().size()));
+
+  EXPECT_OK(builder_->Resize(list_lengths.size()));
+  EXPECT_OK(char_vb->Resize(list_values.size()));
+  EXPECT_OK(int_vb->Resize(int_values.size()));
+
+  int pos = 0;
+  for (size_t i = 0; i < list_lengths.size(); ++i) {
+    ASSERT_OK(list_vb->Append(list_is_valid[i] > 0));
+    int_vb->UnsafeAppend(int_values[i]);
+    for (int j = 0; j < list_lengths[i]; ++j) {
+      char_vb->UnsafeAppend(list_values[pos++]);
+    }
+  }
+
+  for (size_t i = 0; i < struct_is_valid.size(); ++i) {
+    ASSERT_OK(builder_->Append(struct_is_valid[i] > 0));
+  }
+
+  Done();
+
+  ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
+      list_lengths, list_offsets, int_values);
+}
+
+TEST_F(TestStructBuilder, BulkAppend) {
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  Done();
+  ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
+      list_lengths, list_offsets, int_values);
+}
+
+TEST_F(TestStructBuilder, BulkAppendInvalid) {
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 0, 1, 1};  // should be 1, 1, 1, 1
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+
+  ASSERT_OK(builder_->Reserve(list_lengths.size()));
+  ASSERT_OK(char_vb->Reserve(list_values.size()));
+  ASSERT_OK(int_vb->Reserve(int_values.size()));
+
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  Done();
+  // Even null bitmap of the parent Struct is not valid, Validate() will ignore it.
+  ASSERT_OK(result_->Validate());
+}
+
+TEST_F(TestStructBuilder, TestEquality) {
+  ArrayPtr array, equal_array;
+  ArrayPtr unequal_bitmap_array, unequal_offsets_array, unequal_values_array;
+
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+  vector<int32_t> unequal_int_values = {4, 2, 3, 1};
+  vector<char> unequal_list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'l', 'u', 'c', 'y'};
+  vector<int> unequal_list_offsets = {0, 3, 4, 6};
+  vector<uint8_t> unequal_list_is_valid = {1, 1, 1, 1};
+  vector<uint8_t> unequal_struct_is_valid = {1, 0, 0, 1};
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+  ASSERT_OK(builder_->Reserve(list_lengths.size()));
+  ASSERT_OK(char_vb->Reserve(list_values.size()));
+  ASSERT_OK(int_vb->Reserve(int_values.size()));
+
+  // setup two equal arrays, one of which takes an unequal bitmap
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  ASSERT_OK(builder_->Finish(&array));
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  ASSERT_OK(builder_->Finish(&equal_array));
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  // setup an unequal one with the unequal bitmap
+  builder_->Append(unequal_struct_is_valid.size(), unequal_struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  ASSERT_OK(builder_->Finish(&unequal_bitmap_array));
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  // setup an unequal one with unequal offsets
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(unequal_list_offsets.data(), unequal_list_offsets.size(),
+      unequal_list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  ASSERT_OK(builder_->Finish(&unequal_offsets_array));
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  // setup anunequal one with unequal values
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : unequal_list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : unequal_int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  ASSERT_OK(builder_->Finish(&unequal_values_array));
+
+  // Test array equality
+  EXPECT_TRUE(array->Equals(array));
+  EXPECT_TRUE(array->Equals(equal_array));
+  EXPECT_TRUE(equal_array->Equals(array));
+  EXPECT_FALSE(equal_array->Equals(unequal_bitmap_array));
+  EXPECT_FALSE(unequal_bitmap_array->Equals(equal_array));
+  EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_values_array));
+  EXPECT_FALSE(unequal_values_array->Equals(unequal_bitmap_array));
+  EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_offsets_array));
+  EXPECT_FALSE(unequal_offsets_array->Equals(unequal_bitmap_array));
+
+  // Test range equality
+  EXPECT_TRUE(array->RangeEquals(0, 4, 0, equal_array));
+  EXPECT_TRUE(array->RangeEquals(3, 4, 3, unequal_bitmap_array));
+  EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_offsets_array));
+  EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_offsets_array));
+  EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_offsets_array));
+  EXPECT_FALSE(array->RangeEquals(0, 1, 0, unequal_values_array));
+  EXPECT_TRUE(array->RangeEquals(1, 3, 1, unequal_values_array));
+  EXPECT_FALSE(array->RangeEquals(3, 4, 3, unequal_values_array));
+}
+
+TEST_F(TestStructBuilder, TestZeroLength) {
+  // All buffers are null
+  Done();
+  ASSERT_OK(result_->Validate());
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 1581244..783104e 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -24,11 +24,10 @@
 #include "gtest/gtest.h"
 
 #include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/types/primitive.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
 
 namespace arrow {
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 1f0bb66..7ab61f5 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -19,10 +19,13 @@
 
 #include <cstdint>
 #include <cstring>
+#include <sstream>
 
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/status.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
@@ -85,4 +88,440 @@ Status NullArray::Accept(ArrayVisitor* visitor) const {
   return visitor->Visit(*this);
 }
 
+// ----------------------------------------------------------------------
+// Primitive array base
+
+PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length,
+    const std::shared_ptr<Buffer>& data, int32_t null_count,
+    const std::shared_ptr<Buffer>& null_bitmap)
+    : Array(type, length, null_count, null_bitmap) {
+  data_ = data;
+  raw_data_ = data == nullptr ? nullptr : data_->data();
+}
+
+bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
+  if (this == &other) { return true; }
+  if (null_count_ != other.null_count_) { return false; }
+
+  if (null_count_ > 0) {
+    bool equal_bitmap =
+        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8);
+    if (!equal_bitmap) { return false; }
+
+    const uint8_t* this_data = raw_data_;
+    const uint8_t* other_data = other.raw_data_;
+
+    auto size_meta = dynamic_cast<const FixedWidthType*>(type_.get());
+    int value_byte_size = size_meta->bit_width() / 8;
+    DCHECK_GT(value_byte_size, 0);
+
+    for (int i = 0; i < length_; ++i) {
+      if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { return false; }
+      this_data += value_byte_size;
+      other_data += value_byte_size;
+    }
+    return true;
+  } else {
+    if (length_ == 0 && other.length_ == 0) { return true; }
+    return data_->Equals(*other.data_, length_);
+  }
+}
+
+bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
+}
+
+template <typename T>
+Status NumericArray<T>::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
+template class NumericArray<UInt8Type>;
+template class NumericArray<UInt16Type>;
+template class NumericArray<UInt32Type>;
+template class NumericArray<UInt64Type>;
+template class NumericArray<Int8Type>;
+template class NumericArray<Int16Type>;
+template class NumericArray<Int32Type>;
+template class NumericArray<Int64Type>;
+template class NumericArray<TimestampType>;
+template class NumericArray<HalfFloatType>;
+template class NumericArray<FloatType>;
+template class NumericArray<DoubleType>;
+
+// ----------------------------------------------------------------------
+// BooleanArray
+
+BooleanArray::BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
+    int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
+    : PrimitiveArray(
+          std::make_shared<BooleanType>(), length, data, null_count, null_bitmap) {}
+
+BooleanArray::BooleanArray(const TypePtr& type, int32_t length,
+    const std::shared_ptr<Buffer>& data, int32_t null_count,
+    const std::shared_ptr<Buffer>& null_bitmap)
+    : PrimitiveArray(type, length, data, null_count, null_bitmap) {}
+
+bool BooleanArray::EqualsExact(const BooleanArray& other) const {
+  if (this == &other) return true;
+  if (null_count_ != other.null_count_) { return false; }
+
+  if (null_count_ > 0) {
+    bool equal_bitmap =
+        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
+    if (!equal_bitmap) { return false; }
+
+    const uint8_t* this_data = raw_data_;
+    const uint8_t* other_data = other.raw_data_;
+
+    for (int i = 0; i < length_; ++i) {
+      if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data, i)) {
+        return false;
+      }
+    }
+    return true;
+  } else {
+    return data_->Equals(*other.data_, BitUtil::BytesForBits(length_));
+  }
+}
+
+bool BooleanArray::Equals(const ArrayPtr& arr) const {
+  if (this == arr.get()) return true;
+  if (Type::BOOL != arr->type_enum()) { return false; }
+  return EqualsExact(*static_cast<const BooleanArray*>(arr.get()));
+}
+
+bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx,
+    int32_t other_start_idx, const ArrayPtr& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  const auto other = static_cast<BooleanArray*>(arr.get());
+  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+    const bool is_null = IsNull(i);
+    if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+Status BooleanArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+// ListArray
+
+bool ListArray::EqualsExact(const ListArray& other) const {
+  if (this == &other) { return true; }
+  if (null_count_ != other.null_count_) { return false; }
+
+  bool equal_offsets =
+      offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
+  if (!equal_offsets) { return false; }
+  bool equal_null_bitmap = true;
+  if (null_count_ > 0) {
+    equal_null_bitmap =
+        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
+  }
+
+  if (!equal_null_bitmap) { return false; }
+
+  return values()->Equals(other.values());
+}
+
+bool ListArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  return EqualsExact(*static_cast<const ListArray*>(arr.get()));
+}
+
+bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+    const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  const auto other = static_cast<ListArray*>(arr.get());
+  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+    const bool is_null = IsNull(i);
+    if (is_null != arr->IsNull(o_i)) { return false; }
+    if (is_null) continue;
+    const int32_t begin_offset = offset(i);
+    const int32_t end_offset = offset(i + 1);
+    const int32_t other_begin_offset = other->offset(o_i);
+    const int32_t other_end_offset = other->offset(o_i + 1);
+    // Underlying can't be equal if the size isn't equal
+    if (end_offset - begin_offset != other_end_offset - other_begin_offset) {
+      return false;
+    }
+    if (!values_->RangeEquals(
+            begin_offset, end_offset, other_begin_offset, other->values())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+Status ListArray::Validate() const {
+  if (length_ < 0) { return Status::Invalid("Length was negative"); }
+  if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); }
+  if (offset_buffer_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
+    std::stringstream ss;
+    ss << "offset buffer size (bytes): " << offset_buffer_->size()
+       << " isn't large enough for length: " << length_;
+    return Status::Invalid(ss.str());
+  }
+  const int32_t last_offset = offset(length_);
+  if (last_offset > 0) {
+    if (!values_) {
+      return Status::Invalid("last offset was non-zero and values was null");
+    }
+    if (values_->length() != last_offset) {
+      std::stringstream ss;
+      ss << "Final offset invariant not equal to values length: " << last_offset
+         << "!=" << values_->length();
+      return Status::Invalid(ss.str());
+    }
+
+    const Status child_valid = values_->Validate();
+    if (!child_valid.ok()) {
+      std::stringstream ss;
+      ss << "Child array invalid: " << child_valid.ToString();
+      return Status::Invalid(ss.str());
+    }
+  }
+
+  int32_t prev_offset = offset(0);
+  if (prev_offset != 0) { return Status::Invalid("The first offset wasn't zero"); }
+  for (int32_t i = 1; i <= length_; ++i) {
+    int32_t current_offset = offset(i);
+    if (IsNull(i - 1) && current_offset != prev_offset) {
+      std::stringstream ss;
+      ss << "Offset invariant failure at: " << i << " inconsistent offsets for null slot"
+         << current_offset << "!=" << prev_offset;
+      return Status::Invalid(ss.str());
+    }
+    if (current_offset < prev_offset) {
+      std::stringstream ss;
+      ss << "Offset invariant failure: " << i
+         << " inconsistent offset for non-null slot: " << current_offset << "<"
+         << prev_offset;
+      return Status::Invalid(ss.str());
+    }
+    prev_offset = current_offset;
+  }
+  return Status::OK();
+}
+
+Status ListArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+// String and binary
+
+static std::shared_ptr<DataType> kBinary = std::make_shared<BinaryType>();
+static std::shared_ptr<DataType> kString = std::make_shared<StringType>();
+
+BinaryArray::BinaryArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
+    const std::shared_ptr<Buffer>& data, int32_t null_count,
+    const std::shared_ptr<Buffer>& null_bitmap)
+    : BinaryArray(kBinary, length, offsets, data, null_count, null_bitmap) {}
+
+BinaryArray::BinaryArray(const TypePtr& type, int32_t length,
+    const std::shared_ptr<Buffer>& offsets, const std::shared_ptr<Buffer>& data,
+    int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
+    : Array(type, length, null_count, null_bitmap),
+      offset_buffer_(offsets),
+      offsets_(reinterpret_cast<const int32_t*>(offset_buffer_->data())),
+      data_buffer_(data),
+      data_(nullptr) {
+  if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); }
+}
+
+Status BinaryArray::Validate() const {
+  // TODO(wesm): what to do here?
+  return Status::OK();
+}
+
+bool BinaryArray::EqualsExact(const BinaryArray& other) const {
+  if (!Array::EqualsExact(other)) { return false; }
+
+  bool equal_offsets =
+      offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
+  if (!equal_offsets) { return false; }
+
+  if (!data_buffer_ && !(other.data_buffer_)) { return true; }
+
+  return data_buffer_->Equals(*other.data_buffer_, data_buffer_->size());
+}
+
+bool BinaryArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  return EqualsExact(*static_cast<const BinaryArray*>(arr.get()));
+}
+
+bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+    const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  const auto other = static_cast<const BinaryArray*>(arr.get());
+  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+    const bool is_null = IsNull(i);
+    if (is_null != arr->IsNull(o_i)) { return false; }
+    if (is_null) continue;
+    const int32_t begin_offset = offset(i);
+    const int32_t end_offset = offset(i + 1);
+    const int32_t other_begin_offset = other->offset(o_i);
+    const int32_t other_end_offset = other->offset(o_i + 1);
+    // Underlying can't be equal if the size isn't equal
+    if (end_offset - begin_offset != other_end_offset - other_begin_offset) {
+      return false;
+    }
+
+    if (std::memcmp(data_ + begin_offset, other->data_ + other_begin_offset,
+            end_offset - begin_offset)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+Status BinaryArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
+StringArray::StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
+    const std::shared_ptr<Buffer>& data, int32_t null_count,
+    const std::shared_ptr<Buffer>& null_bitmap)
+    : BinaryArray(kString, length, offsets, data, null_count, null_bitmap) {}
+
+Status StringArray::Validate() const {
+  // TODO(emkornfield) Validate proper UTF8 code points?
+  return BinaryArray::Validate();
+}
+
+Status StringArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+// Struct
+
+std::shared_ptr<Array> StructArray::field(int32_t pos) const {
+  DCHECK_GT(field_arrays_.size(), 0);
+  return field_arrays_[pos];
+}
+
+bool StructArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  if (null_count_ != arr->null_count()) { return false; }
+  return RangeEquals(0, length_, 0, arr);
+}
+
+bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+    const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (Type::STRUCT != arr->type_enum()) { return false; }
+  const auto other = static_cast<StructArray*>(arr.get());
+
+  bool equal_fields = true;
+  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+    if (IsNull(i) != arr->IsNull(o_i)) { return false; }
+    if (IsNull(i)) continue;
+    for (size_t j = 0; j < field_arrays_.size(); ++j) {
+      // TODO: really we should be comparing stretches of non-null data rather
+      // than looking at one value at a time.
+      equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other->field(j));
+      if (!equal_fields) { return false; }
+    }
+  }
+
+  return true;
+}
+
+Status StructArray::Validate() const {
+  if (length_ < 0) { return Status::Invalid("Length was negative"); }
+
+  if (null_count() > length_) {
+    return Status::Invalid("Null count exceeds the length of this struct");
+  }
+
+  if (field_arrays_.size() > 0) {
+    // Validate fields
+    int32_t array_length = field_arrays_[0]->length();
+    size_t idx = 0;
+    for (auto it : field_arrays_) {
+      if (it->length() != array_length) {
+        std::stringstream ss;
+        ss << "Length is not equal from field " << it->type()->ToString()
+           << " at position {" << idx << "}";
+        return Status::Invalid(ss.str());
+      }
+
+      const Status child_valid = it->Validate();
+      if (!child_valid.ok()) {
+        std::stringstream ss;
+        ss << "Child array invalid: " << child_valid.ToString() << " at position {" << idx
+           << "}";
+        return Status::Invalid(ss.str());
+      }
+      ++idx;
+    }
+
+    if (array_length > 0 && array_length != length_) {
+      return Status::Invalid("Struct's length is not equal to its child arrays");
+    }
+  }
+  return Status::OK();
+}
+
+Status StructArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+
+#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType)                          \
+  case Type::ENUM:                                                          \
+    out->reset(new ArrayType(type, length, data, null_count, null_bitmap)); \
+    break;
+
+Status MakePrimitiveArray(const TypePtr& type, int32_t length,
+    const std::shared_ptr<Buffer>& data, int32_t null_count,
+    const std::shared_ptr<Buffer>& null_bitmap, ArrayPtr* out) {
+  switch (type->type) {
+    MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
+    MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
+    MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray);
+    MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP_DOUBLE, DoubleArray);
+    default:
+      return Status::NotImplemented(type->ToString());
+  }
+#ifdef NDEBUG
+  return Status::OK();
+#else
+  return (*out)->Validate();
+#endif
+}
+
 }  // namespace arrow


Mime
View raw message