arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-60: [C++] Struct type builder API
Date Tue, 07 Jun 2016 06:48:36 GMT
Repository: arrow
Updated Branches:
  refs/heads/master ce2fe7a78 -> 9ce13a067


ARROW-60: [C++] Struct type builder API

Implement the basic classes, `StructArray` and `StructBuilder,` meanwhile,

add the perspective test cases for them.

Other necessary methods will be added subsequently.

Author: fengguangyuan <root@node20.(none)>

Closes #66 from fengguangyuan/ARROW-60 and squashes the following commits:

190967f [fengguangyuan] ARROW-60: [C++] Struct type builder API Add field index and TODO comment.
ae74c80 [fengguangyuan] ARROW-60: Struct type builder API Add RangeEquals method to implement
Equals method.
fa856fd [fengguangyuan] ARROW-60:[C++] Struct typebuilder API Modify Validate() refered to
the specification.
bfabdc1 [fengguangyuan] ARROW-60: Struct type builder API Refine the previous committed patch.
Add validate methods for testing StructArray and StructBuilder. TODO, Equals methods also
need to be tested, but now it's not convient to do it.
5733de7 [fengguangyuan] ARROW-60: Struct type builder API


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/9ce13a06
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/9ce13a06
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/9ce13a06

Branch: refs/heads/master
Commit: 9ce13a06726874c04433100127f74e6ea4afa855
Parents: ce2fe7a
Author: fengguangyuan <root@node20.(none)>
Authored: Mon Jun 6 23:32:38 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Mon Jun 6 23:32:38 2016 -0700

----------------------------------------------------------------------
 cpp/src/arrow/type.h               |   1 +
 cpp/src/arrow/types/construct.cc   |  15 ++
 cpp/src/arrow/types/construct.h    |   3 +-
 cpp/src/arrow/types/struct-test.cc | 332 ++++++++++++++++++++++++++++++++
 cpp/src/arrow/types/struct.cc      |  72 ++++++-
 cpp/src/arrow/types/struct.h       |  97 +++++++++-
 6 files changed, 517 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/9ce13a06/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 77404cd..f366645 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -161,6 +161,7 @@ struct Field {
 
   std::string ToString() const;
 };
+typedef std::shared_ptr<Field> FieldPtr;
 
 template <typename Derived>
 struct PrimitiveType : public DataType {

http://git-wip-us.apache.org/repos/asf/arrow/blob/9ce13a06/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 78036d4..bcb0ec4 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -23,6 +23,7 @@
 #include "arrow/types/list.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
+#include "arrow/types/struct.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/status.h"
 
@@ -66,6 +67,20 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>&
type,
       out->reset(new ListBuilder(pool, value_builder));
       return Status::OK();
     }
+
+    case Type::STRUCT: {
+      std::vector<FieldPtr>& fields = type->children_;
+      std::vector<std::shared_ptr<ArrayBuilder>> values_builder;
+
+      for (auto it : fields) {
+        std::shared_ptr<ArrayBuilder> builder;
+        RETURN_NOT_OK(MakeBuilder(pool, it->type, &builder));
+        values_builder.push_back(builder);
+      }
+      out->reset(new StructBuilder(pool, type, values_builder));
+      return Status::OK();
+    }
+
     default:
       return Status::NotImplemented(type->ToString());
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/9ce13a06/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index 43c0018..d037084 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -20,13 +20,14 @@
 
 #include <cstdint>
 #include <memory>
-
+#include <vector>
 namespace arrow {
 
 class Array;
 class ArrayBuilder;
 class Buffer;
 struct DataType;
+struct Field;
 class MemoryPool;
 class Status;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9ce13a06/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index 79d560e..d2bd297 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -21,7 +21,16 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
+#include "arrow/types/struct.h"
+#include "arrow/types/test-common.h"
+#include "arrow/util/status.h"
 
 using std::shared_ptr;
 using std::string;
@@ -52,4 +61,327 @@ TEST(TestStructType, Basics) {
   // TODO(wesm): out of bounds for field(...)
 }
 
+void ValidateBasicStructArray(const StructArray* result,
+    const vector<uint8_t>& struct_is_valid, const vector<char>& list_values,
+    const vector<uint8_t>& list_is_valid, const vector<int>& list_lengths,
+    const vector<int>& list_offsets, const vector<int32_t>& int_values)
{
+  ASSERT_EQ(4, result->length());
+  ASSERT_OK(result->Validate());
+
+  auto list_char_arr = static_cast<ListArray*>(result->field(0).get());
+  auto char_arr = static_cast<Int8Array*>(list_char_arr->values().get());
+  auto int32_arr = static_cast<Int32Array*>(result->field(1).get());
+
+  ASSERT_EQ(0, result->null_count());
+  ASSERT_EQ(1, list_char_arr->null_count());
+  ASSERT_EQ(0, int32_arr->null_count());
+
+  // List<char>
+  ASSERT_EQ(4, list_char_arr->length());
+  ASSERT_EQ(10, list_char_arr->values()->length());
+  for (size_t i = 0; i < list_offsets.size(); ++i) {
+    ASSERT_EQ(list_offsets[i], list_char_arr->offsets()[i]);
+  }
+  for (size_t i = 0; i < list_values.size(); ++i) {
+    ASSERT_EQ(list_values[i], char_arr->Value(i));
+  }
+
+  // Int32
+  ASSERT_EQ(4, int32_arr->length());
+  for (size_t i = 0; i < int_values.size(); ++i) {
+    ASSERT_EQ(int_values[i], int32_arr->Value(i));
+  }
+}
+
+// ----------------------------------------------------------------------------------
+// Struct test
+class TestStructBuilder : public TestBuilder {
+ public:
+  void SetUp() {
+    TestBuilder::SetUp();
+
+    auto int32_type = TypePtr(new Int32Type());
+    auto char_type = TypePtr(new Int8Type());
+    auto list_type = TypePtr(new ListType(char_type));
+
+    std::vector<TypePtr> types = {list_type, int32_type};
+    std::vector<FieldPtr> fields;
+    fields.push_back(FieldPtr(new Field("list", list_type)));
+    fields.push_back(FieldPtr(new Field("int", int32_type)));
+
+    type_ = TypePtr(new StructType(fields));
+    value_fields_ = fields;
+
+    std::shared_ptr<ArrayBuilder> tmp;
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+
+    builder_ = std::dynamic_pointer_cast<StructBuilder>(tmp);
+    ASSERT_EQ(2, builder_->field_builders().size());
+  }
+
+  void Done() { result_ = std::dynamic_pointer_cast<StructArray>(builder_->Finish());
}
+
+ protected:
+  std::vector<FieldPtr> value_fields_;
+  TypePtr type_;
+
+  std::shared_ptr<StructBuilder> builder_;
+  std::shared_ptr<StructArray> result_;
+};
+
+TEST_F(TestStructBuilder, TestAppendNull) {
+  ASSERT_OK(builder_->AppendNull());
+  ASSERT_OK(builder_->AppendNull());
+  ASSERT_EQ(2, builder_->field_builders().size());
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  ASSERT_OK(list_vb->AppendNull());
+  ASSERT_OK(list_vb->AppendNull());
+  ASSERT_EQ(2, list_vb->length());
+
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+  ASSERT_OK(int_vb->AppendNull());
+  ASSERT_OK(int_vb->AppendNull());
+  ASSERT_EQ(2, int_vb->length());
+
+  Done();
+
+  ASSERT_OK(result_->Validate());
+
+  ASSERT_EQ(2, result_->fields().size());
+  ASSERT_EQ(2, result_->length());
+  ASSERT_EQ(2, result_->field(0)->length());
+  ASSERT_EQ(2, result_->field(1)->length());
+  ASSERT_TRUE(result_->IsNull(0));
+  ASSERT_TRUE(result_->IsNull(1));
+  ASSERT_TRUE(result_->field(0)->IsNull(0));
+  ASSERT_TRUE(result_->field(0)->IsNull(1));
+  ASSERT_TRUE(result_->field(1)->IsNull(0));
+  ASSERT_TRUE(result_->field(1)->IsNull(1));
+
+  ASSERT_EQ(Type::LIST, result_->field(0)->type_enum());
+  ASSERT_EQ(Type::INT32, result_->field(1)->type_enum());
+}
+
+TEST_F(TestStructBuilder, TestBasics) {
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6, 10};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+  ASSERT_EQ(2, builder_->field_builders().size());
+
+  EXPECT_OK(builder_->Resize(list_lengths.size()));
+  EXPECT_OK(char_vb->Resize(list_values.size()));
+  EXPECT_OK(int_vb->Resize(int_values.size()));
+
+  int pos = 0;
+  for (size_t i = 0; i < list_lengths.size(); ++i) {
+    ASSERT_OK(list_vb->Append(list_is_valid[i] > 0));
+    int_vb->UnsafeAppend(int_values[i]);
+    for (int j = 0; j < list_lengths[i]; ++j) {
+      char_vb->UnsafeAppend(list_values[pos++]);
+    }
+  }
+
+  for (size_t i = 0; i < struct_is_valid.size(); ++i) {
+    ASSERT_OK(builder_->Append(struct_is_valid[i] > 0));
+  }
+
+  Done();
+
+  ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
+      list_lengths, list_offsets, int_values);
+}
+
+TEST_F(TestStructBuilder, BulkAppend) {
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  Done();
+  ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
+      list_lengths, list_offsets, int_values);
+}
+
+TEST_F(TestStructBuilder, BulkAppendInvalid) {
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 0, 1, 1};  // should be 1, 1, 1, 1
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+
+  ASSERT_OK(builder_->Reserve(list_lengths.size()));
+  ASSERT_OK(char_vb->Reserve(list_values.size()));
+  ASSERT_OK(int_vb->Reserve(int_values.size()));
+
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+
+  Done();
+  // Even null bitmap of the parent Struct is not valid, Validate() will ignore it.
+  ASSERT_OK(result_->Validate());
+}
+
+TEST_F(TestStructBuilder, TestEquality) {
+  ArrayPtr array, equal_array;
+  ArrayPtr unequal_bitmap_array, unequal_offsets_array, unequal_values_array;
+
+  vector<int32_t> int_values = {1, 2, 3, 4};
+  vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+  vector<int> list_lengths = {3, 0, 3, 4};
+  vector<int> list_offsets = {0, 3, 3, 6};
+  vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+  vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+  vector<int32_t> unequal_int_values = {4, 2, 3, 1};
+  vector<char> unequal_list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'l', 'u', 'c',
'y'};
+  vector<int> unequal_list_offsets = {0, 3, 4, 6};
+  vector<uint8_t> unequal_list_is_valid = {1, 1, 1, 1};
+  vector<uint8_t> unequal_struct_is_valid = {1, 0, 0, 1};
+
+  ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+  Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+  Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+  ASSERT_OK(builder_->Reserve(list_lengths.size()));
+  ASSERT_OK(char_vb->Reserve(list_values.size()));
+  ASSERT_OK(int_vb->Reserve(int_values.size()));
+
+  // setup two equal arrays, one of which takes an unequal bitmap
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+  array = builder_->Finish();
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+  equal_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  // setup an unequal one with the unequal bitmap
+  builder_->Append(unequal_struct_is_valid.size(), unequal_struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+  unequal_bitmap_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  // setup an unequal one with unequal offsets
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(unequal_list_offsets.data(), unequal_list_offsets.size(),
+      unequal_list_is_valid.data());
+  for (int8_t value : list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+  unequal_offsets_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Resize(list_lengths.size()));
+  ASSERT_OK(char_vb->Resize(list_values.size()));
+  ASSERT_OK(int_vb->Resize(int_values.size()));
+
+  // setup anunequal one with unequal values
+  builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+  list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+  for (int8_t value : unequal_list_values) {
+    char_vb->UnsafeAppend(value);
+  }
+  for (int32_t value : unequal_int_values) {
+    int_vb->UnsafeAppend(value);
+  }
+  unequal_values_array = builder_->Finish();
+
+  // Test array equality
+  EXPECT_TRUE(array->Equals(array));
+  EXPECT_TRUE(array->Equals(equal_array));
+  EXPECT_TRUE(equal_array->Equals(array));
+  EXPECT_FALSE(equal_array->Equals(unequal_bitmap_array));
+  EXPECT_FALSE(unequal_bitmap_array->Equals(equal_array));
+  EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_values_array));
+  EXPECT_FALSE(unequal_values_array->Equals(unequal_bitmap_array));
+  EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_offsets_array));
+  EXPECT_FALSE(unequal_offsets_array->Equals(unequal_bitmap_array));
+
+  // Test range equality
+  EXPECT_TRUE(array->RangeEquals(0, 4, 0, equal_array));
+  EXPECT_TRUE(array->RangeEquals(3, 4, 3, unequal_bitmap_array));
+  EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_offsets_array));
+  EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_offsets_array));
+  EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_offsets_array));
+  EXPECT_FALSE(array->RangeEquals(0, 1, 0, unequal_values_array));
+  EXPECT_TRUE(array->RangeEquals(1, 3, 1, unequal_values_array));
+  EXPECT_FALSE(array->RangeEquals(3, 4, 3, unequal_values_array));
+}
+
+TEST_F(TestStructBuilder, TestZeroLength) {
+  // All buffers are null
+  Done();
+  ASSERT_OK(result_->Validate());
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9ce13a06/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index 04a277a..e8176f0 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -17,4 +17,74 @@
 
 #include "arrow/types/struct.h"
 
-namespace arrow {}  // namespace arrow
+#include <sstream>
+
+namespace arrow {
+
+bool StructArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  if (null_count_ != arr->null_count()) { return false; }
+  return RangeEquals(0, length_, 0, arr);
+}
+
+bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+    const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (Type::STRUCT != arr->type_enum()) { return false; }
+  const auto other = static_cast<StructArray*>(arr.get());
+
+  bool equal_fields = true;
+  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+    if (IsNull(i) != arr->IsNull(o_i)) { return false; }
+    if (IsNull(i)) continue;
+    for (size_t j = 0; j < field_arrays_.size(); ++j) {
+      // TODO: really we should be comparing stretches of non-null data rather
+      // than looking at one value at a time.
+      equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other->field(j));
+      if (!equal_fields) { return false; }
+    }
+  }
+
+  return true;
+}
+
+Status StructArray::Validate() const {
+  if (length_ < 0) { return Status::Invalid("Length was negative"); }
+
+  if (null_count() > length_) {
+    return Status::Invalid("Null count exceeds the length of this struct");
+  }
+
+  if (field_arrays_.size() > 0) {
+    // Validate fields
+    int32_t array_length = field_arrays_[0]->length();
+    size_t idx = 0;
+    for (auto it : field_arrays_) {
+      if (it->length() != array_length) {
+        std::stringstream ss;
+        ss << "Length is not equal from field " << it->type()->ToString()
+           << " at position {" << idx << "}";
+        return Status::Invalid(ss.str());
+      }
+
+      const Status child_valid = it->Validate();
+      if (!child_valid.ok()) {
+        std::stringstream ss;
+        ss << "Child array invalid: " << child_valid.ToString() << " at
position {" << idx
+           << "}";
+        return Status::Invalid(ss.str());
+      }
+      ++idx;
+    }
+
+    if (array_length > 0 && array_length != length_) {
+      return Status::Invalid("Struct's length is not equal to its child arrays");
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9ce13a06/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
index 17e3299..78afd29 100644
--- a/cpp/src/arrow/types/struct.h
+++ b/cpp/src/arrow/types/struct.h
@@ -23,7 +23,102 @@
 #include <vector>
 
 #include "arrow/type.h"
+#include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
 
-namespace arrow {}  // namespace arrow
+namespace arrow {
+
+class StructArray : public Array {
+ public:
+  StructArray(const TypePtr& type, int32_t length, std::vector<ArrayPtr>& field_arrays,
+      int32_t null_count = 0, std::shared_ptr<Buffer> null_bitmap = nullptr)
+      : Array(type, length, null_count, null_bitmap) {
+    type_ = type;
+    field_arrays_ = field_arrays;
+  }
+
+  Status Validate() const override;
+
+  virtual ~StructArray() {}
+
+  // Return a shared pointer in case the requestor desires to share ownership
+  // with this array.
+  const std::shared_ptr<Array>& field(int32_t pos) const {
+    DCHECK_GT(field_arrays_.size(), 0);
+    return field_arrays_[pos];
+  }
+  const std::vector<ArrayPtr>& fields() const { return field_arrays_; }
+
+  bool EqualsExact(const StructArray& other) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
+  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+      const std::shared_ptr<Array>& arr) const override;
+
+ protected:
+  // The child arrays corresponding to each field of the struct data type.
+  std::vector<ArrayPtr> field_arrays_;
+};
+
+// ---------------------------------------------------------------------------------
+// StructArray builder
+// Append, Resize and Reserve methods are acting on StructBuilder.
+// Please make sure all these methods of all child-builders' are consistently
+// called to maintain data-structure consistency.
+class StructBuilder : public ArrayBuilder {
+ public:
+  StructBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+      const std::vector<std::shared_ptr<ArrayBuilder>>& field_builders)
+      : ArrayBuilder(pool, type) {
+    field_builders_ = field_builders;
+  }
+
+  // Null bitmap is of equal length to every child field, and any zero byte
+  // will be considered as a null for that field, but users must using app-
+  // end methods or advance methods of the child builders' independently to
+  // insert data.
+  Status Append(int32_t length, const uint8_t* valid_bytes) {
+    RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  std::shared_ptr<Array> Finish() override {
+    std::vector<ArrayPtr> fields;
+    for (auto it : field_builders_) {
+      fields.push_back(it->Finish());
+    }
+
+    auto result =
+        std::make_shared<StructArray>(type_, length_, fields, null_count_, null_bitmap_);
+
+    null_bitmap_ = nullptr;
+    capacity_ = length_ = null_count_ = 0;
+
+    return result;
+  }
+
+  // Append an element to the Struct. All child-builders' Append method must
+  // be called independently to maintain data-structure consistency.
+  Status Append(bool is_valid = true) {
+    RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(is_valid);
+    return Status::OK();
+  }
+
+  Status AppendNull() { return Append(false); }
+
+  const std::shared_ptr<ArrayBuilder> field_builder(int pos) const {
+    DCHECK_GT(field_builders_.size(), 0);
+    return field_builders_[pos];
+  }
+  const std::vector<std::shared_ptr<ArrayBuilder>>& field_builders() const
{
+    return field_builders_;
+  }
+
+ protected:
+  std::vector<std::shared_ptr<ArrayBuilder>> field_builders_;
+};
+
+}  // namespace arrow
 
 #endif  // ARROW_TYPES_STRUCT_H


Mime
View raw message