arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-107: [C++] Implement IPC for structs
Date Tue, 16 Aug 2016 06:04:55 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 268e108c2 -> 246a126b2


ARROW-107: [C++] Implement IPC for structs

Some other changes (I tried to isolate each in there own commit):
1.  Changed NumericTypes to be its own tempated type instead of separate macros (this made
debugging easier)
2.  Fix an existing unit test for IPC that row counts inconsistent with row batch size.
3.  Some minor make-format changes.

Author: Micah Kornfield <emkornfield@gmail.com>

Closes #117 from emkornfield/emk_struct_ipc and squashes the following commits:

777e338 [Micah Kornfield] fix formatting
9008046 [Micah Kornfield] use TypeClass::c_type
e46b0d8 [Micah Kornfield] add skip for memory pool test
fc63bff [Micah Kornfield] make lint and make format
9aa972b [Micah Kornfield] change macro to templates instead (makes debugging easier)
3e01e7f [Micah Kornfield] Implement struct round-trip.  Fix unit test for non null to have
consistent batch sizes
8eaf1e7 [Micah Kornfield] fix formatting


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/246a126b
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/246a126b
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/246a126b

Branch: refs/heads/master
Commit: 246a126b23dc20bca7b665ec76d75ca4a68cd1f1
Parents: 268e108
Author: Micah Kornfield <emkornfield@gmail.com>
Authored: Mon Aug 15 23:04:46 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Mon Aug 15 23:04:46 2016 -0700

----------------------------------------------------------------------
 cpp/src/.clang-tidy-ignore             |  1 +
 cpp/src/arrow/ipc/adapter.cc           | 24 ++++++-
 cpp/src/arrow/ipc/ipc-adapter-test.cc  | 46 +++++++++++---
 cpp/src/arrow/ipc/metadata-internal.cc | 10 +--
 cpp/src/arrow/types/primitive.h        | 97 +++++++++++++++--------------
 cpp/src/arrow/util/memory-pool-test.cc |  2 +-
 6 files changed, 115 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/246a126b/cpp/src/.clang-tidy-ignore
----------------------------------------------------------------------
diff --git a/cpp/src/.clang-tidy-ignore b/cpp/src/.clang-tidy-ignore
index a128c38..5ab4d20 100644
--- a/cpp/src/.clang-tidy-ignore
+++ b/cpp/src/.clang-tidy-ignore
@@ -1 +1,2 @@
 ipc-adapter-test.cc
+memory-pool-test.cc

http://git-wip-us.apache.org/repos/asf/arrow/blob/246a126b/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index 84f7830..3259980 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -34,6 +34,7 @@
 #include "arrow/types/list.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
+#include "arrow/types/struct.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/status.h"
@@ -118,8 +119,11 @@ Status VisitArray(const Array* arr, std::vector<flatbuf::FieldNode>*
field_nodes
     RETURN_NOT_OK(VisitArray(
         list_arr->values().get(), field_nodes, buffers, max_recursion_depth - 1));
   } else if (arr->type_enum() == Type::STRUCT) {
-    // TODO(wesm)
-    return Status::NotImplemented("Struct type");
+    const auto struct_arr = static_cast<const StructArray*>(arr);
+    for (auto& field : struct_arr->fields()) {
+      RETURN_NOT_OK(
+          VisitArray(field.get(), field_nodes, buffers, max_recursion_depth - 1));
+    }
   } else {
     return Status::NotImplemented("Unrecognized type");
   }
@@ -313,6 +317,22 @@ class RowBatchReader::Impl {
       return MakeListArray(type, field_meta.length, offsets, values_array,
           field_meta.null_count, null_bitmap, out);
     }
+
+    if (type->type == Type::STRUCT) {
+      const int num_children = type->num_children();
+      std::vector<ArrayPtr> fields;
+      fields.reserve(num_children);
+      for (int child_idx = 0; child_idx < num_children; ++child_idx) {
+        std::shared_ptr<Array> field_array;
+        RETURN_NOT_OK(NextArray(
+            type->child(child_idx).get(), max_recursion_depth - 1, &field_array));
+        fields.push_back(field_array);
+      }
+      out->reset(new StructArray(
+          type, field_meta.length, fields, field_meta.null_count, null_bitmap));
+      return Status::OK();
+    }
+
     return Status::NotImplemented("Non-primitive types not complete yet");
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/246a126b/cpp/src/arrow/ipc/ipc-adapter-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc
index 2bfb459..6740e0f 100644
--- a/cpp/src/arrow/ipc/ipc-adapter-test.cc
+++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc
@@ -32,6 +32,7 @@
 #include "arrow/types/list.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
+#include "arrow/types/struct.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/memory-pool.h"
@@ -205,15 +206,16 @@ Status MakeNonNullRowBatch(std::shared_ptr<RowBatch>* out) {
 
   // Example data
   MemoryPool* pool = default_memory_pool();
-  const int length = 200;
+  const int length = 50;
   std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
 
   RETURN_NOT_OK(MakeRandomInt32Array(1000, true, pool, &leaf_values));
   bool include_nulls = false;
-  RETURN_NOT_OK(MakeRandomListArray(leaf_values, 50, include_nulls, pool, &list_array));
   RETURN_NOT_OK(
-      MakeRandomListArray(list_array, 50, include_nulls, pool, &list_list_array));
-  RETURN_NOT_OK(MakeRandomInt32Array(0, include_nulls, pool, &flat_array));
+      MakeRandomListArray(leaf_values, length, include_nulls, pool, &list_array));
+  RETURN_NOT_OK(
+      MakeRandomListArray(list_array, length, include_nulls, pool, &list_list_array));
+  RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array));
   out->reset(new RowBatch(schema, length, {list_array, list_list_array, flat_array}));
   return Status::OK();
 }
@@ -238,10 +240,40 @@ Status MakeDeeplyNestedList(std::shared_ptr<RowBatch>* out) {
   return Status::OK();
 }
 
-INSTANTIATE_TEST_CASE_P(
-    RoundTripTests, TestWriteRowBatch,
+Status MakeStruct(std::shared_ptr<RowBatch>* out) {
+  // reuse constructed list columns
+  std::shared_ptr<RowBatch> list_batch;
+  RETURN_NOT_OK(MakeListRowBatch(&list_batch));
+  std::vector<ArrayPtr> columns = {
+      list_batch->column(0), list_batch->column(1), list_batch->column(2)};
+  auto list_schema = list_batch->schema();
+
+  // Define schema
+  std::shared_ptr<DataType> type(new StructType(
+      {list_schema->field(0), list_schema->field(1), list_schema->field(2)}));
+  auto f0 = std::make_shared<Field>("non_null_struct", type);
+  auto f1 = std::make_shared<Field>("null_struct", type);
+  std::shared_ptr<Schema> schema(new Schema({f0, f1}));
+
+  // construct individual nullable/non-nullable struct arrays
+  ArrayPtr no_nulls(new StructArray(type, list_batch->num_rows(), columns));
+  std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
+  null_bytes[0] = 0;
+  std::shared_ptr<Buffer> null_bitmask;
+  RETURN_NOT_OK(util::bytes_to_bits(null_bytes, &null_bitmask));
+  ArrayPtr with_nulls(
+      new StructArray(type, list_batch->num_rows(), columns, 1, null_bitmask));
+
+  // construct batch
+  std::vector<ArrayPtr> arrays = {no_nulls, with_nulls};
+  out->reset(new RowBatch(schema, list_batch->num_rows(), arrays));
+  return Status::OK();
+}
+
+INSTANTIATE_TEST_CASE_P(RoundTripTests, TestWriteRowBatch,
     ::testing::Values(&MakeIntRowBatch, &MakeListRowBatch, &MakeNonNullRowBatch,
-        &MakeZeroLengthRowBatch, &MakeDeeplyNestedList, &MakeStringTypesRowBatch));
+                            &MakeZeroLengthRowBatch, &MakeDeeplyNestedList,
+                            &MakeStringTypesRowBatch, &MakeStruct));
 
 void TestGetRowBatchSize(std::shared_ptr<RowBatch> batch) {
   MockMemorySource mock_source(1 << 16);

http://git-wip-us.apache.org/repos/asf/arrow/blob/246a126b/cpp/src/arrow/ipc/metadata-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 1d3edf0..8cd416f 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -265,11 +265,8 @@ Status MessageBuilder::SetSchema(const Schema* schema) {
     field_offsets.push_back(offset);
   }
 
-  header_ = flatbuf::CreateSchema(
-                fbb_,
-                endianness(),
-                fbb_.CreateVector(field_offsets))
-                .Union();
+  header_ =
+      flatbuf::CreateSchema(fbb_, endianness(), fbb_.CreateVector(field_offsets)).Union();
   body_length_ = 0;
   return Status::OK();
 }
@@ -278,8 +275,7 @@ Status MessageBuilder::SetRecordBatch(int32_t length, int64_t body_length,
     const std::vector<flatbuf::FieldNode>& nodes,
     const std::vector<flatbuf::Buffer>& buffers) {
   header_type_ = flatbuf::MessageHeader_RecordBatch;
-  header_ = flatbuf::CreateRecordBatch(fbb_, length,
-                fbb_.CreateVectorOfStructs(nodes),
+  header_ = flatbuf::CreateRecordBatch(fbb_, length, fbb_.CreateVectorOfStructs(nodes),
                 fbb_.CreateVectorOfStructs(buffers))
                 .Union();
   body_length_ = body_length;

http://git-wip-us.apache.org/repos/asf/arrow/blob/246a126b/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 18f954a..770de76 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -53,54 +53,55 @@ class ARROW_EXPORT PrimitiveArray : public Array {
   const uint8_t* raw_data_;
 };
 
-#define NUMERIC_ARRAY_DECL(NAME, TypeClass, T)                                         \
-  class ARROW_EXPORT NAME : public PrimitiveArray {                                    \
-   public:                                                                             \
-    using value_type = T;                                                              \
-                                                                                       \
-    NAME(int32_t length, const std::shared_ptr<Buffer>& data, int32_t null_count
= 0,  \
-        const std::shared_ptr<Buffer>& null_bitmap = nullptr)                 
        \
-        : PrimitiveArray(                                                              \
-              std::make_shared<TypeClass>(), length, data, null_count, null_bitmap)
{} \
-    NAME(const TypePtr& type, int32_t length, const std::shared_ptr<Buffer>&
data,     \
-        int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr)
 \
-        : PrimitiveArray(type, length, data, null_count, null_bitmap) {}               \
-                                                                                       \
-    bool EqualsExact(const NAME& other) const {                                     
  \
-      return PrimitiveArray::EqualsExact(*static_cast<const PrimitiveArray*>(&other));
\
-    }                                                                                  \
-                                                                                       \
-    bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,      \
-        const ArrayPtr& arr) const override {                                       
  \
-      if (this == arr.get()) { return true; }                                          \
-      if (!arr) { return false; }                                                      \
-      if (this->type_enum() != arr->type_enum()) { return false; }                
    \
-      const auto other = static_cast<NAME*>(arr.get());                           
    \
-      for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {  
 \
-        const bool is_null = IsNull(i);                                                \
-        if (is_null != arr->IsNull(o_i) ||                                           
 \
-            (!is_null && Value(i) != other->Value(o_i))) {                   
         \
-          return false;                                                                \
-        }                                                                              \
-      }                                                                                \
-      return true;                                                                     \
-    }                                                                                  \
-                                                                                       \
-    const T* raw_data() const { return reinterpret_cast<const T*>(raw_data_); }   
    \
-                                                                                       \
-    T Value(int i) const { return raw_data()[i]; }                                     \
-  };
-
-NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type, uint8_t);
-NUMERIC_ARRAY_DECL(Int8Array, Int8Type, int8_t);
-NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type, uint16_t);
-NUMERIC_ARRAY_DECL(Int16Array, Int16Type, int16_t);
-NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type, uint32_t);
-NUMERIC_ARRAY_DECL(Int32Array, Int32Type, int32_t);
-NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type, uint64_t);
-NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t);
-NUMERIC_ARRAY_DECL(FloatArray, FloatType, float);
-NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double);
+template <class TypeClass>
+class ARROW_EXPORT NumericArray : public PrimitiveArray {
+ public:
+  using value_type = typename TypeClass::c_type;
+  NumericArray(int32_t length, const std::shared_ptr<Buffer>& data,
+      int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr)
+      : PrimitiveArray(
+            std::make_shared<TypeClass>(), length, data, null_count, null_bitmap) {}
+  NumericArray(const TypePtr& type, int32_t length, const std::shared_ptr<Buffer>&
data,
+      int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr)
+      : PrimitiveArray(type, length, data, null_count, null_bitmap) {}
+
+  bool EqualsExact(const NumericArray<TypeClass>& other) const {
+    return PrimitiveArray::EqualsExact(*static_cast<const PrimitiveArray*>(&other));
+  }
+
+  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+      const ArrayPtr& arr) const override {
+    if (this == arr.get()) { return true; }
+    if (!arr) { return false; }
+    if (this->type_enum() != arr->type_enum()) { return false; }
+    const auto other = static_cast<NumericArray<TypeClass>*>(arr.get());
+    for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+      const bool is_null = IsNull(i);
+      if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i)))
{
+        return false;
+      }
+    }
+    return true;
+  }
+  const value_type* raw_data() const {
+    return reinterpret_cast<const value_type*>(raw_data_);
+  }
+
+  value_type Value(int i) const { return raw_data()[i]; }
+};
+
+#define NUMERIC_ARRAY_DECL(NAME, TypeClass) using NAME = NumericArray<TypeClass>;
+
+NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type);
+NUMERIC_ARRAY_DECL(Int8Array, Int8Type);
+NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type);
+NUMERIC_ARRAY_DECL(Int16Array, Int16Type);
+NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type);
+NUMERIC_ARRAY_DECL(Int32Array, Int32Type);
+NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type);
+NUMERIC_ARRAY_DECL(Int64Array, Int64Type);
+NUMERIC_ARRAY_DECL(FloatArray, FloatType);
+NUMERIC_ARRAY_DECL(DoubleArray, DoubleType);
 
 template <typename Type>
 class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {

http://git-wip-us.apache.org/repos/asf/arrow/blob/246a126b/cpp/src/arrow/util/memory-pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc
index 919f374..deb7ffd 100644
--- a/cpp/src/arrow/util/memory-pool-test.cc
+++ b/cpp/src/arrow/util/memory-pool-test.cc
@@ -54,7 +54,7 @@ TEST(DefaultMemoryPoolDeathTest, FreeLargeMemory) {
 
 #ifndef NDEBUG
   EXPECT_EXIT(pool->Free(data, 120), ::testing::ExitedWithCode(1),
-               ".*Check failed: \\(bytes_allocated_\\) >= \\(size\\)");
+      ".*Check failed: \\(bytes_allocated_\\) >= \\(size\\)");
 #endif
 
   pool->Free(data, 100);


Mime
View raw message