arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [1/3] arrow git commit: ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps
Date Wed, 23 Mar 2016 01:45:22 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 093f9bd8c -> 65db0da80


http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/integer.cc b/cpp/src/arrow/types/integer.cc
deleted file mode 100644
index 4696536..0000000
--- a/cpp/src/arrow/types/integer.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/integer.h"
-
-namespace arrow {
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/integer.h b/cpp/src/arrow/types/integer.h
deleted file mode 100644
index 5684191..0000000
--- a/cpp/src/arrow/types/integer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_INTEGER_H
-#define ARROW_TYPES_INTEGER_H
-
-#include <cstdint>
-#include <string>
-
-#include "arrow/types/primitive.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-// Array containers
-
-typedef PrimitiveArrayImpl<UInt8Type> UInt8Array;
-typedef PrimitiveArrayImpl<Int8Type> Int8Array;
-
-typedef PrimitiveArrayImpl<UInt16Type> UInt16Array;
-typedef PrimitiveArrayImpl<Int16Type> Int16Array;
-
-typedef PrimitiveArrayImpl<UInt32Type> UInt32Array;
-typedef PrimitiveArrayImpl<Int32Type> Int32Array;
-
-typedef PrimitiveArrayImpl<UInt64Type> UInt64Array;
-typedef PrimitiveArrayImpl<Int64Type> Int64Array;
-
-// Builders
-
-typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
-typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
-typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
-typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
-
-typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
-typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
-typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
-typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_INTEGER_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc
index 168e370..fb731ed 100644
--- a/cpp/src/arrow/types/json.cc
+++ b/cpp/src/arrow/types/json.cc
@@ -20,7 +20,6 @@
 #include <vector>
 
 #include "arrow/type.h"
-#include "arrow/types/string.h"
 #include "arrow/types/union.h"
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h
index b67fb38..9c850af 100644
--- a/cpp/src/arrow/types/json.h
+++ b/cpp/src/arrow/types/json.h
@@ -28,8 +28,8 @@ struct JSONScalar : public DataType {
   static TypePtr dense_type;
   static TypePtr sparse_type;
 
-  explicit JSONScalar(bool dense = true, bool nullable = true)
-      : DataType(LogicalType::JSON_SCALAR, nullable),
+  explicit JSONScalar(bool dense = true)
+      : DataType(Type::JSON_SCALAR),
         dense(dense) {}
 };
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 02991de..eb55ca8 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -15,20 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdlib>
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/array.h"
+#include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/types/construct.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/list.h"
-#include "arrow/types/string.h"
+#include "arrow/types/primitive.h"
 #include "arrow/types/test-common.h"
 #include "arrow/util/status.h"
 
@@ -39,27 +40,24 @@ using std::vector;
 
 namespace arrow {
 
-class ArrayBuilder;
-
 TEST(TypesTest, TestListType) {
   std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
 
   ListType list_type(vt);
-  ASSERT_EQ(list_type.type, LogicalType::LIST);
+  ASSERT_EQ(list_type.type, Type::LIST);
 
   ASSERT_EQ(list_type.name(), string("list"));
-  ASSERT_EQ(list_type.ToString(), string("list<uint8>"));
+  ASSERT_EQ(list_type.ToString(), string("list<item: uint8>"));
 
-  ASSERT_EQ(list_type.value_type->type, vt->type);
-  ASSERT_EQ(list_type.value_type->type, vt->type);
+  ASSERT_EQ(list_type.value_type()->type, vt->type);
+  ASSERT_EQ(list_type.value_type()->type, vt->type);
 
-  std::shared_ptr<DataType> st = std::make_shared<StringType>(false);
-  std::shared_ptr<DataType> lt = std::make_shared<ListType>(st, false);
-  ASSERT_EQ(lt->ToString(), string("list<string not null> not null"));
+  std::shared_ptr<DataType> st = std::make_shared<StringType>();
+  std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
+  ASSERT_EQ(lt->ToString(), string("list<item: string>"));
 
-  ListType lt2(lt, false);
-  ASSERT_EQ(lt2.ToString(),
-      string("list<list<string not null> not null> not null"));
+  ListType lt2(lt);
+  ASSERT_EQ(lt2.ToString(), string("list<item: list<item: string>>"));
 }
 
 // ----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 69a79a7..670ee4d 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -19,4 +19,33 @@
 
 namespace arrow {
 
+bool ListArray::EqualsExact(const ListArray& other) const {
+  if (this == &other) return true;
+  if (null_count_ != other.null_count_) {
+    return false;
+  }
+
+  bool equal_offsets = offset_buf_->Equals(*other.offset_buf_,
+      length_ + 1);
+  bool equal_nulls = true;
+  if (null_count_ > 0) {
+    equal_nulls = nulls_->Equals(*other.nulls_,
+        util::bytes_for_bits(length_));
+  }
+
+  if (!(equal_offsets && equal_nulls)) {
+    return false;
+  }
+
+  return values()->Equals(other.values());
+}
+
+bool ListArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) return true;
+  if (this->type_enum() != arr->type_enum()) {
+    return false;
+  }
+  return EqualsExact(*static_cast<const ListArray*>(arr.get()));
+}
+
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 210c76a..141f762 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -21,12 +21,10 @@
 #include <cstdint>
 #include <cstring>
 #include <memory>
-#include <string>
 
 #include "arrow/array.h"
 #include "arrow/builder.h"
 #include "arrow/type.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/primitive.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
@@ -38,29 +36,19 @@ class MemoryPool;
 
 class ListArray : public Array {
  public:
-  ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {}
-
   ListArray(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
       const ArrayPtr& values,
       int32_t null_count = 0,
-      std::shared_ptr<Buffer> nulls = nullptr) {
-    Init(type, length, offsets, values, null_count, nulls);
-  }
-
-  virtual ~ListArray() {}
-
-  void Init(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
-      const ArrayPtr& values,
-      int32_t null_count = 0,
-      std::shared_ptr<Buffer> nulls = nullptr) {
+      std::shared_ptr<Buffer> nulls = nullptr) :
+      Array(type, length, null_count, nulls) {
     offset_buf_ = offsets;
     offsets_ = offsets == nullptr? nullptr :
       reinterpret_cast<const int32_t*>(offset_buf_->data());
-
     values_ = values;
-    Array::Init(type, length, null_count, nulls);
   }
 
+  virtual ~ListArray() {}
+
   // Return a shared pointer in case the requestor desires to share ownership
   // with this array.
   const std::shared_ptr<Array>& values() const {return values_;}
@@ -77,6 +65,9 @@ class ListArray : public Array {
   int32_t value_offset(int i) { return offsets_[i];}
   int32_t value_length(int i) { return offsets_[i + 1] - offsets_[i];}
 
+  bool EqualsExact(const ListArray& other) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
+
  protected:
   std::shared_ptr<Buffer> offset_buf_;
   const int32_t* offsets_;
@@ -137,8 +128,6 @@ class ListBuilder : public Int32Builder {
 
   template <typename Container>
   std::shared_ptr<Array> Transfer() {
-    auto result = std::make_shared<Container>();
-
     std::shared_ptr<Array> items = value_builder_->Finish();
 
     // Add final offset if the length is non-zero
@@ -146,8 +135,9 @@ class ListBuilder : public Int32Builder {
       raw_buffer()[length_] = items->length();
     }
 
-    result->Init(type_, length_, values_, items,
+    auto result = std::make_shared<Container>(type_, length_, values_, items,
         null_count_, nulls_);
+
     values_ = nulls_ = nullptr;
     capacity_ = length_ = null_count_ = 0;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index f35a258..7eae8cd 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -15,21 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
-
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
-#include "arrow/array.h"
+#include "gtest/gtest.h"
+
 #include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/types/boolean.h"
 #include "arrow/types/construct.h"
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/test-common.h"
 #include "arrow/util/bit-util.h"
@@ -43,23 +39,17 @@ using std::vector;
 
 namespace arrow {
 
-TEST(TypesTest, TestBytesType) {
-  BytesType t1(3);
-
-  ASSERT_EQ(t1.type, LayoutEnum::BYTE);
-  ASSERT_EQ(t1.size, 3);
-}
-
+class Array;
 
 #define PRIMITIVE_TEST(KLASS, ENUM, NAME)       \
   TEST(TypesTest, TestPrimitive_##ENUM) {       \
     KLASS tp;                                   \
                                                 \
-    ASSERT_EQ(tp.type, LogicalType::ENUM);      \
+    ASSERT_EQ(tp.type, Type::ENUM);             \
     ASSERT_EQ(tp.name(), string(NAME));         \
                                                 \
     KLASS tp_copy = tp;                         \
-    ASSERT_EQ(tp_copy.type, LogicalType::ENUM); \
+    ASSERT_EQ(tp_copy.type, Type::ENUM);        \
   }
 
 PRIMITIVE_TEST(Int8Type, INT8, "int8");
@@ -109,22 +99,20 @@ class TestPrimitiveBuilder : public TestBuilder {
 
   void RandomData(int N, double pct_null = 0.1) {
     Attrs::draw(N, &draws_);
-    random_nulls(N, pct_null, &nulls_);
+    test::random_nulls(N, pct_null, &nulls_);
   }
 
   void CheckNullable() {
-    ArrayType expected;
     int size = builder_->length();
 
     auto ex_data = std::make_shared<Buffer>(
         reinterpret_cast<uint8_t*>(draws_.data()),
         size * sizeof(T));
 
-    auto ex_nulls = bytes_to_null_buffer(nulls_.data(), size);
-
-    int32_t ex_null_count = null_count(nulls_);
+    auto ex_nulls = test::bytes_to_null_buffer(nulls_.data(), size);
+    int32_t ex_null_count = test::null_count(nulls_);
 
-    expected.Init(size, ex_data, ex_null_count, ex_nulls);
+    auto expected = std::make_shared<ArrayType>(size, ex_data, ex_null_count, ex_nulls);
 
     std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
         builder_->Finish());
@@ -135,18 +123,17 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_EQ(0, builder_->null_count());
     ASSERT_EQ(nullptr, builder_->buffer());
 
-    ASSERT_TRUE(result->Equals(expected));
+    ASSERT_TRUE(result->EqualsExact(*expected.get()));
     ASSERT_EQ(ex_null_count, result->null_count());
   }
 
   void CheckNonNullable() {
-    ArrayType expected;
     int size = builder_nn_->length();
 
     auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
         size * sizeof(T));
 
-    expected.Init(size, ex_data);
+    auto expected = std::make_shared<ArrayType>(size, ex_data);
 
     std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
         builder_nn_->Finish());
@@ -156,7 +143,7 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_EQ(0, builder_nn_->capacity());
     ASSERT_EQ(nullptr, builder_nn_->buffer());
 
-    ASSERT_TRUE(result->Equals(expected));
+    ASSERT_TRUE(result->EqualsExact(*expected.get()));
     ASSERT_EQ(0, result->null_count());
   }
 
@@ -183,8 +170,8 @@ class TestPrimitiveBuilder : public TestBuilder {
 #define PINT_DECL(CapType, c_type, LOWER, UPPER)    \
   struct P##CapType {                               \
     PTYPE_DECL(CapType, c_type);                    \
-    static void draw(int N, vector<T>* draws) {  \
-      randint<T>(N, LOWER, UPPER, draws);           \
+    static void draw(int N, vector<T>* draws) {     \
+      test::randint<T>(N, LOWER, UPPER, draws);     \
     }                                               \
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index c86260b..32b8bfa 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -26,16 +26,16 @@ namespace arrow {
 // ----------------------------------------------------------------------
 // Primitive array base
 
-void PrimitiveArray::Init(const TypePtr& type, int32_t length,
+PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length,
     const std::shared_ptr<Buffer>& data,
     int32_t null_count,
-    const std::shared_ptr<Buffer>& nulls) {
-  Array::Init(type, length, null_count, nulls);
+    const std::shared_ptr<Buffer>& nulls) :
+    Array(type, length, null_count, nulls) {
   data_ = data;
   raw_data_ = data == nullptr? nullptr : data_->data();
 }
 
-bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
+bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
   if (this == &other) return true;
   if (null_count_ != other.null_count_) {
     return false;
@@ -50,4 +50,12 @@ bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
   }
 }
 
+bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) return true;
+  if (this->type_enum() != arr->type_enum()) {
+    return false;
+  }
+  return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
+}
+
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 22ab59c..e01027c 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -21,7 +21,6 @@
 #include <cstdint>
 #include <cstring>
 #include <memory>
-#include <string>
 
 #include "arrow/array.h"
 #include "arrow/builder.h"
@@ -38,64 +37,57 @@ class MemoryPool;
 // Base class for fixed-size logical types
 class PrimitiveArray : public Array {
  public:
-  PrimitiveArray() : Array(), data_(nullptr), raw_data_(nullptr) {}
-
-  virtual ~PrimitiveArray() {}
-
-  void Init(const TypePtr& type, int32_t length,
+  PrimitiveArray(const TypePtr& type, int32_t length,
       const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0,
       const std::shared_ptr<Buffer>& nulls = nullptr);
+  virtual ~PrimitiveArray() {}
 
   const std::shared_ptr<Buffer>& data() const { return data_;}
 
-  bool Equals(const PrimitiveArray& other) const;
+  bool EqualsExact(const PrimitiveArray& other) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
 
  protected:
   std::shared_ptr<Buffer> data_;
   const uint8_t* raw_data_;
 };
 
-
-template <typename TypeClass>
-class PrimitiveArrayImpl : public PrimitiveArray {
- public:
-  typedef typename TypeClass::c_type value_type;
-
-  PrimitiveArrayImpl() : PrimitiveArray() {}
-
-  virtual ~PrimitiveArrayImpl() {}
-
-  PrimitiveArrayImpl(int32_t length, const std::shared_ptr<Buffer>& data,
-      int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    Init(length, data, null_count, nulls);
-  }
-
-  void Init(int32_t length, const std::shared_ptr<Buffer>& data,
-      int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    TypePtr type(new TypeClass());
-    PrimitiveArray::Init(type, length, data, null_count, nulls);
-  }
-
-  bool Equals(const PrimitiveArrayImpl& other) const {
-    return PrimitiveArray::Equals(*static_cast<const PrimitiveArray*>(&other));
-  }
-
-  const value_type* raw_data() const {
-    return reinterpret_cast<const value_type*>(raw_data_);
-  }
-
-  value_type Value(int i) const {
-    return raw_data()[i];
-  }
-
-  TypeClass* exact_type() const {
-    return static_cast<TypeClass*>(type_);
-  }
+#define NUMERIC_ARRAY_DECL(NAME, TypeClass, T)                      \
+class NAME : public PrimitiveArray {                                \
+ public:                                                            \
+  using value_type = T;                                             \
+  using PrimitiveArray::PrimitiveArray;                             \
+  NAME(int32_t length, const std::shared_ptr<Buffer>& data,         \
+      int32_t null_count = 0,                                       \
+      const std::shared_ptr<Buffer>& nulls = nullptr) :             \
+      PrimitiveArray(std::make_shared<TypeClass>(), length, data,   \
+          null_count, nulls) {}                                     \
+                                                                    \
+  bool EqualsExact(const NAME& other) const {                       \
+    return PrimitiveArray::EqualsExact(                             \
+        *static_cast<const PrimitiveArray*>(&other));               \
+  }                                                                 \
+                                                                    \
+  const T* raw_data() const {                                       \
+    return reinterpret_cast<const T*>(raw_data_);                   \
+  }                                                                 \
+                                                                    \
+  T Value(int i) const {                                            \
+    return raw_data()[i];                                           \
+  }                                                                 \
 };
 
+NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type, uint8_t);
+NUMERIC_ARRAY_DECL(Int8Array, Int8Type, int8_t);
+NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type, uint16_t);
+NUMERIC_ARRAY_DECL(Int16Array, Int16Type, int16_t);
+NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type, uint32_t);
+NUMERIC_ARRAY_DECL(Int32Array, Int32Type, int32_t);
+NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type, uint64_t);
+NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t);
+NUMERIC_ARRAY_DECL(FloatArray, FloatType, float);
+NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double);
 
 template <typename Type, typename ArrayType>
 class PrimitiveBuilder : public ArrayBuilder {
@@ -202,8 +194,9 @@ class PrimitiveBuilder : public ArrayBuilder {
   }
 
   std::shared_ptr<Array> Finish() override {
-    std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>();
-    result->PrimitiveArray::Init(type_, length_, values_, null_count_, nulls_);
+    std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>(
+        type_, length_, values_, null_count_, nulls_);
+
     values_ = nulls_ = nullptr;
     capacity_ = length_ = null_count_ = 0;
     return result;
@@ -222,6 +215,21 @@ class PrimitiveBuilder : public ArrayBuilder {
   int elsize_;
 };
 
+// Builders
+
+typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
+typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
+typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
+typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
+
+typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
+typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
+typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
+typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
+
+typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
+typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
+
 } // namespace arrow
 
 #endif  // ARROW_TYPES_PRIMITIVE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 6381093..7dc3d68 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -15,21 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdint>
+#include <cstdlib>
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/array.h"
-#include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/types/construct.h"
-#include "arrow/types/integer.h"
+#include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
 #include "arrow/types/test-common.h"
-#include "arrow/util/status.h"
 
 namespace arrow {
 
@@ -38,14 +37,14 @@ class Buffer;
 TEST(TypesTest, TestCharType) {
   CharType t1(5);
 
-  ASSERT_EQ(t1.type, LogicalType::CHAR);
+  ASSERT_EQ(t1.type, Type::CHAR);
   ASSERT_EQ(t1.size, 5);
 
   ASSERT_EQ(t1.ToString(), std::string("char(5)"));
 
   // Test copy constructor
   CharType t2 = t1;
-  ASSERT_EQ(t2.type, LogicalType::CHAR);
+  ASSERT_EQ(t2.type, Type::CHAR);
   ASSERT_EQ(t2.size, 5);
 }
 
@@ -53,22 +52,20 @@ TEST(TypesTest, TestCharType) {
 TEST(TypesTest, TestVarcharType) {
   VarcharType t1(5);
 
-  ASSERT_EQ(t1.type, LogicalType::VARCHAR);
+  ASSERT_EQ(t1.type, Type::VARCHAR);
   ASSERT_EQ(t1.size, 5);
-  ASSERT_EQ(t1.physical_type.size, 6);
 
   ASSERT_EQ(t1.ToString(), std::string("varchar(5)"));
 
   // Test copy constructor
   VarcharType t2 = t1;
-  ASSERT_EQ(t2.type, LogicalType::VARCHAR);
+  ASSERT_EQ(t2.type, Type::VARCHAR);
   ASSERT_EQ(t2.size, 5);
-  ASSERT_EQ(t2.physical_type.size, 6);
 }
 
 TEST(TypesTest, TestStringType) {
   StringType str;
-  ASSERT_EQ(str.type, LogicalType::STRING);
+  ASSERT_EQ(str.type, Type::STRING);
   ASSERT_EQ(str.name(), std::string("string"));
 }
 
@@ -90,15 +87,16 @@ class TestStringContainer : public ::testing::Test  {
     length_ = offsets_.size() - 1;
     int nchars = chars_.size();
 
-    value_buf_ = to_buffer(chars_);
+    value_buf_ = test::to_buffer(chars_);
     values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));
 
-    offsets_buf_ = to_buffer(offsets_);
+    offsets_buf_ = test::to_buffer(offsets_);
 
-    nulls_buf_ = bytes_to_null_buffer(nulls_.data(), nulls_.size());
-    null_count_ = null_count(nulls_);
+    nulls_buf_ = test::bytes_to_null_buffer(nulls_.data(), nulls_.size());
+    null_count_ = test::null_count(nulls_);
 
-    strings_.Init(length_, offsets_buf_, values_, null_count_, nulls_buf_);
+    strings_ = std::make_shared<StringArray>(length_, offsets_buf_, values_,
+        null_count_, nulls_buf_);
   }
 
  protected:
@@ -116,28 +114,28 @@ class TestStringContainer : public ::testing::Test  {
   int length_;
 
   ArrayPtr values_;
-  StringArray strings_;
+  std::shared_ptr<StringArray> strings_;
 };
 
 
 TEST_F(TestStringContainer, TestArrayBasics) {
-  ASSERT_EQ(length_, strings_.length());
-  ASSERT_EQ(1, strings_.null_count());
+  ASSERT_EQ(length_, strings_->length());
+  ASSERT_EQ(1, strings_->null_count());
 }
 
 TEST_F(TestStringContainer, TestType) {
-  TypePtr type = strings_.type();
+  TypePtr type = strings_->type();
 
-  ASSERT_EQ(LogicalType::STRING, type->type);
-  ASSERT_EQ(LogicalType::STRING, strings_.logical_type());
+  ASSERT_EQ(Type::STRING, type->type);
+  ASSERT_EQ(Type::STRING, strings_->type_enum());
 }
 
 
 TEST_F(TestStringContainer, TestListFunctions) {
   int pos = 0;
   for (size_t i = 0; i < expected_.size(); ++i) {
-    ASSERT_EQ(pos, strings_.value_offset(i));
-    ASSERT_EQ(expected_[i].size(), strings_.value_length(i));
+    ASSERT_EQ(pos, strings_->value_offset(i));
+    ASSERT_EQ(expected_[i].size(), strings_->value_length(i));
     pos += expected_[i].size();
   }
 }
@@ -151,9 +149,9 @@ TEST_F(TestStringContainer, TestDestructor) {
 TEST_F(TestStringContainer, TestGetString) {
   for (size_t i = 0; i < expected_.size(); ++i) {
     if (nulls_[i]) {
-      ASSERT_TRUE(strings_.IsNull(i));
+      ASSERT_TRUE(strings_->IsNull(i));
     } else {
-      ASSERT_EQ(expected_[i], strings_.GetString(i));
+      ASSERT_EQ(expected_[i], strings_->GetString(i));
     }
   }
 }
@@ -199,7 +197,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
   Done();
 
   ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps * null_count(is_null), result_->null_count());
+  ASSERT_EQ(reps * test::null_count(is_null), result_->null_count());
   ASSERT_EQ(reps * 6, result_->values()->length());
 
   int32_t length;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index 8ccc0a9..2b3fba5 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -25,25 +25,21 @@
 
 #include "arrow/array.h"
 #include "arrow/type.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
 #include "arrow/util/status.h"
 
 namespace arrow {
 
-class ArrayBuilder;
 class Buffer;
 class MemoryPool;
 
 struct CharType : public DataType {
   int size;
 
-  BytesType physical_type;
-
-  explicit CharType(int size, bool nullable = true)
-      : DataType(LogicalType::CHAR, nullable),
-        size(size),
-        physical_type(BytesType(size)) {}
+  explicit CharType(int size)
+      : DataType(Type::CHAR),
+        size(size) {}
 
   CharType(const CharType& other)
       : CharType(other.size) {}
@@ -56,54 +52,36 @@ struct CharType : public DataType {
 struct VarcharType : public DataType {
   int size;
 
-  BytesType physical_type;
-
-  explicit VarcharType(int size, bool nullable = true)
-      : DataType(LogicalType::VARCHAR, nullable),
-        size(size),
-        physical_type(BytesType(size + 1)) {}
+  explicit VarcharType(int size)
+      : DataType(Type::VARCHAR),
+        size(size) {}
   VarcharType(const VarcharType& other)
       : VarcharType(other.size) {}
 
   virtual std::string ToString() const;
 };
 
-static const LayoutPtr byte1(new BytesType(1));
-static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1));
-
 // TODO: add a BinaryArray layer in between
 class StringArray : public ListArray {
  public:
-  StringArray() : ListArray(), bytes_(nullptr), raw_bytes_(nullptr) {}
-
-  StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
-      const ArrayPtr& values,
-      int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    Init(length, offsets, values, null_count, nulls);
-  }
-
-  void Init(const TypePtr& type, int32_t length,
+  StringArray(const TypePtr& type, int32_t length,
       const std::shared_ptr<Buffer>& offsets,
       const ArrayPtr& values,
       int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    ListArray::Init(type, length, offsets, values, null_count, nulls);
-
-    // TODO: type validation for values array
-
+      const std::shared_ptr<Buffer>& nulls = nullptr) :
+      ListArray(type, length, offsets, values, null_count, nulls) {
     // For convenience
     bytes_ = static_cast<UInt8Array*>(values.get());
     raw_bytes_ = bytes_->raw_data();
   }
 
-  void Init(int32_t length, const std::shared_ptr<Buffer>& offsets,
+  StringArray(int32_t length,
+      const std::shared_ptr<Buffer>& offsets,
       const ArrayPtr& values,
       int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    TypePtr type(new StringType());
-    Init(type, length, offsets, values, null_count, nulls);
-  }
+      const std::shared_ptr<Buffer>& nulls = nullptr) :
+      StringArray(std::make_shared<StringType>(), length, offsets, values,
+          null_count, nulls) {}
 
   // Compute the pointer t
   const uint8_t* GetValue(int i, int32_t* out_length) const {
@@ -125,9 +103,6 @@ class StringArray : public ListArray {
 };
 
 // Array builder
-
-
-
 class StringBuilder : public ListBuilder {
  public:
   explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index 9a4777e..d94396f 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -15,16 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
-
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/type.h"
-#include "arrow/types/integer.h"
-#include "arrow/types/string.h"
-#include "arrow/types/struct.h"
 
 using std::shared_ptr;
 using std::string;
@@ -42,13 +39,13 @@ TEST(TestStructType, Basics) {
   TypePtr f2_type = TypePtr(new UInt8Type());
   auto f2 = std::make_shared<Field>("f2", f2_type);
 
-  vector<shared_ptr<Field> > fields = {f0, f1, f2};
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
 
   StructType struct_type(fields);
 
-  ASSERT_TRUE(struct_type.field(0)->Equals(f0));
-  ASSERT_TRUE(struct_type.field(1)->Equals(f1));
-  ASSERT_TRUE(struct_type.field(2)->Equals(f2));
+  ASSERT_TRUE(struct_type.child(0)->Equals(f0));
+  ASSERT_TRUE(struct_type.child(1)->Equals(f1));
+  ASSERT_TRUE(struct_type.child(2)->Equals(f2));
 
   ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
index 1744efc..227aca6 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -18,11 +18,12 @@
 #ifndef ARROW_TYPES_TEST_COMMON_H
 #define ARROW_TYPES_TEST_COMMON_H
 
-#include <gtest/gtest.h>
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/memory-pool.h"
@@ -34,7 +35,7 @@ namespace arrow {
 class TestBuilder : public ::testing::Test {
  public:
   void SetUp() {
-    pool_ = GetDefaultMemoryPool();
+    pool_ = default_memory_pool();
     type_ = TypePtr(new UInt8Type());
     builder_.reset(new UInt8Builder(pool_, type_));
     builder_nn_.reset(new UInt8Builder(pool_, type_));

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/union.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h
index 9aff780..29cda90 100644
--- a/cpp/src/arrow/types/union.h
+++ b/cpp/src/arrow/types/union.h
@@ -30,8 +30,8 @@ namespace arrow {
 
 class Buffer;
 
-struct DenseUnionType : public CollectionType<LogicalType::DENSE_UNION> {
-  typedef CollectionType<LogicalType::DENSE_UNION> Base;
+struct DenseUnionType : public CollectionType<Type::DENSE_UNION> {
+  typedef CollectionType<Type::DENSE_UNION> Base;
 
   explicit DenseUnionType(const std::vector<TypePtr>& child_types) :
       Base() {
@@ -42,8 +42,8 @@ struct DenseUnionType : public CollectionType<LogicalType::DENSE_UNION> {
 };
 
 
-struct SparseUnionType : public CollectionType<LogicalType::SPARSE_UNION> {
-  typedef CollectionType<LogicalType::SPARSE_UNION> Base;
+struct SparseUnionType : public CollectionType<Type::SPARSE_UNION> {
+  typedef CollectionType<Type::SPARSE_UNION> Base;
 
   explicit SparseUnionType(const std::vector<TypePtr>& child_types) :
       Base() {
@@ -55,28 +55,20 @@ struct SparseUnionType : public CollectionType<LogicalType::SPARSE_UNION> {
 
 
 class UnionArray : public Array {
- public:
-  UnionArray() : Array() {}
-
  protected:
   // The data are types encoded as int16
   Buffer* types_;
-  std::vector<std::shared_ptr<Array> > children_;
+  std::vector<std::shared_ptr<Array>> children_;
 };
 
 
 class DenseUnionArray : public UnionArray {
- public:
-  DenseUnionArray() : UnionArray() {}
-
  protected:
   Buffer* offset_buf_;
 };
 
 
 class SparseUnionArray : public UnionArray {
- public:
-  SparseUnionArray() : UnionArray() {}
 };
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index 7506ca5..220bff0 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
-
 #include "arrow/util/bit-util.h"
 
+#include "gtest/gtest.h"
+
 namespace arrow {
 
 TEST(UtilTests, TestNextPower2) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 5e7197f..1d2d1d5 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -19,7 +19,6 @@
 #define ARROW_UTIL_BIT_UTIL_H
 
 #include <cstdint>
-#include <cstdlib>
 #include <memory>
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc
index 9f1fd91..1d58226 100644
--- a/cpp/src/arrow/util/buffer-test.cc
+++ b/cpp/src/arrow/util/buffer-test.cc
@@ -15,11 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdint>
 #include <limits>
 #include <string>
 
+#include "gtest/gtest.h"
+
 #include "arrow/test-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/status.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 50f4716..04cdcd7 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -40,7 +40,7 @@ std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
 PoolBuffer::PoolBuffer(MemoryPool* pool) :
     ResizableBuffer(nullptr, 0) {
   if (pool == nullptr) {
-    pool = GetDefaultMemoryPool();
+    pool = default_memory_pool();
   }
   pool_ = pool;
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc
index 954b5f9..6ef07a0 100644
--- a/cpp/src/arrow/util/memory-pool-test.cc
+++ b/cpp/src/arrow/util/memory-pool-test.cc
@@ -15,10 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdint>
 #include <limits>
 
+#include "gtest/gtest.h"
+
 #include "arrow/test-util.h"
 #include "arrow/util/memory-pool.h"
 #include "arrow/util/status.h"
@@ -26,7 +27,7 @@
 namespace arrow {
 
 TEST(DefaultMemoryPool, MemoryTracking) {
-  MemoryPool* pool = GetDefaultMemoryPool();
+  MemoryPool* pool = default_memory_pool();
 
   uint8_t* data;
   ASSERT_OK(pool->Allocate(100, &data));
@@ -37,7 +38,7 @@ TEST(DefaultMemoryPool, MemoryTracking) {
 }
 
 TEST(DefaultMemoryPool, OOM) {
-  MemoryPool* pool = GetDefaultMemoryPool();
+  MemoryPool* pool = default_memory_pool();
 
   uint8_t* data;
   int64_t to_alloc = std::numeric_limits<int64_t>::max();

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc
index 5820346..0b885e9 100644
--- a/cpp/src/arrow/util/memory-pool.cc
+++ b/cpp/src/arrow/util/memory-pool.cc
@@ -70,9 +70,9 @@ void InternalMemoryPool::Free(uint8_t* buffer, int64_t size) {
 
 InternalMemoryPool::~InternalMemoryPool() {}
 
-MemoryPool* GetDefaultMemoryPool() {
-  static InternalMemoryPool default_memory_pool;
-  return &default_memory_pool;
+MemoryPool* default_memory_pool() {
+  static InternalMemoryPool default_memory_pool_;
+  return &default_memory_pool_;
 }
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.h b/cpp/src/arrow/util/memory-pool.h
index a7cb10d..0d24786 100644
--- a/cpp/src/arrow/util/memory-pool.h
+++ b/cpp/src/arrow/util/memory-pool.h
@@ -34,7 +34,7 @@ class MemoryPool {
   virtual int64_t bytes_allocated() const = 0;
 };
 
-MemoryPool* GetDefaultMemoryPool();
+MemoryPool* default_memory_pool();
 
 } // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc
index c6e113e..43cb87e 100644
--- a/cpp/src/arrow/util/status.cc
+++ b/cpp/src/arrow/util/status.cc
@@ -54,6 +54,9 @@ std::string Status::CodeAsString() const {
     case StatusCode::Invalid:
       type = "Invalid";
       break;
+    case StatusCode::IOError:
+      type = "IOError";
+      break;
     case StatusCode::NotImplemented:
       type = "NotImplemented";
       break;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/util/status.h
index 47fda40..b593123 100644
--- a/cpp/src/arrow/util/status.h
+++ b/cpp/src/arrow/util/status.h
@@ -63,6 +63,7 @@ enum class StatusCode: char {
   OutOfMemory = 1,
   KeyError = 2,
   Invalid = 3,
+  IOError = 4,
 
   NotImplemented = 10,
 };
@@ -97,12 +98,17 @@ class Status {
     return Status(StatusCode::Invalid, msg, -1);
   }
 
+  static Status IOError(const std::string& msg) {
+    return Status(StatusCode::IOError, msg, -1);
+  }
+
   // Returns true iff the status indicates success.
   bool ok() const { return (state_ == NULL); }
 
   bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; }
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
+  bool IsIOError() const { return code() == StatusCode::IOError; }
 
   // Return a string representation of this status suitable for printing.
   // Returns the string "OK" for success.

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/test_main.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/test_main.cc b/cpp/src/arrow/util/test_main.cc
index 00139f3..adc8466 100644
--- a/cpp/src/arrow/util/test_main.cc
+++ b/cpp/src/arrow/util/test_main.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
+#include "gtest/gtest.h"
 
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/build_thirdparty.sh b/cpp/thirdparty/build_thirdparty.sh
index 294737c..3d5f532 100755
--- a/cpp/thirdparty/build_thirdparty.sh
+++ b/cpp/thirdparty/build_thirdparty.sh
@@ -17,6 +17,7 @@ else
     case $arg in
       "gtest")      F_GTEST=1 ;;
       "gbenchmark")      F_GBENCHMARK=1 ;;
+      "flatbuffers")      F_FLATBUFFERS=1 ;;
       *)            echo "Unknown module: $arg"; exit 1 ;;
     esac
   done
@@ -78,6 +79,14 @@ if [ -n "$F_ALL" -o -n "$F_GBENCHMARK" ]; then
   make VERBOSE=1 install || { echo "make $GBENCHMARK_ERROR" ; exit 1; }
 fi
 
+FLATBUFFERS_ERROR="failed for flatbuffers"
+if [ -n "$F_ALL" -o -n "$F_FLATBUFFERS" ]; then
+  cd $TP_DIR/$FLATBUFFERS_BASEDIR
+
+  CXXFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX -DFLATBUFFERS_BUILD_TESTS=OFF . || { echo "cmake $FLATBUFFERS_ERROR" ; exit 1; }
+  make -j$PARALLEL
+  make install
+fi
 
 echo "---------------------"
 echo "Thirdparty dependencies built and installed into $PREFIX successfully"

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/download_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/download_thirdparty.sh b/cpp/thirdparty/download_thirdparty.sh
index d22c559..d299afc 100755
--- a/cpp/thirdparty/download_thirdparty.sh
+++ b/cpp/thirdparty/download_thirdparty.sh
@@ -25,3 +25,8 @@ if [ ! -d ${GBENCHMARK_BASEDIR} ]; then
   echo "Fetching google benchmark"
   download_extract_and_cleanup $GBENCHMARK_URL
 fi
+
+if [ ! -d ${FLATBUFFERS_BASEDIR} ]; then
+  echo "Fetching flatbuffers"
+  download_extract_and_cleanup $FLATBUFFERS_URL
+fi

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/versions.sh b/cpp/thirdparty/versions.sh
index 9cfc7cd..cb455b4 100755
--- a/cpp/thirdparty/versions.sh
+++ b/cpp/thirdparty/versions.sh
@@ -5,3 +5,7 @@ GTEST_BASEDIR=googletest-release-$GTEST_VERSION
 GBENCHMARK_VERSION=1.0.0
 GBENCHMARK_URL="https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
 GBENCHMARK_BASEDIR=benchmark-$GBENCHMARK_VERSION
+
+FLATBUFFERS_VERSION=1.3.0
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz"
+FLATBUFFERS_BASEDIR=flatbuffers-$FLATBUFFERS_VERSION

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/format/Message.fbs
----------------------------------------------------------------------
diff --git a/format/Message.fbs b/format/Message.fbs
new file mode 100644
index 0000000..3ffd203
--- /dev/null
+++ b/format/Message.fbs
@@ -0,0 +1,183 @@
+namespace apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// Logical types and their metadata (if any)
+///
+/// These are stored in the flatbuffer in the Type union below
+
+/// A Tuple in the flatbuffer metadata is the same as an Arrow Struct
+/// (according to the physical memory layout). We used Tuple here as Struct is
+/// a reserved word in Flatbuffers
+table Tuple {
+}
+
+table List {
+}
+
+enum UnionMode:int { Sparse, Dense }
+
+table Union {
+  mode: UnionMode;
+}
+
+table Bit {
+}
+
+table Int {
+  bitWidth: int; // 1 to 64
+  is_signed: bool;
+}
+
+enum Precision:int {SINGLE, DOUBLE}
+
+table FloatingPoint {
+  precision: Precision;
+}
+
+table Utf8 {
+}
+
+table Binary {
+}
+
+table Bool {
+}
+
+table Decimal {
+  precision: int;
+  scale: int;
+}
+
+table Timestamp {
+  timezone: string;
+}
+
+table JSONScalar {
+  dense:bool=true;
+}
+
+/// ----------------------------------------------------------------------
+/// Top-level Type value, enabling extensible type-specific metadata. We can
+/// add new logical types to Type without breaking backwards compatibility
+
+union Type {
+  Int,
+  Bit,
+  FloatingPoint,
+  Binary,
+  Utf8,
+  Bool,
+  Decimal,
+  Timestamp,
+  List,
+  Tuple,
+  Union,
+  JSONScalar
+}
+
+/// ----------------------------------------------------------------------
+/// A field represents a named column in a record / row batch or child of a
+/// nested type.
+///
+/// - children is only for nested Arrow arrays
+/// - For primitive types, children will have length 0
+/// - nullable should default to true in general
+
+table Field {
+  // Name is not required, in i.e. a List
+  name: string;
+  nullable: bool;
+  type: Type;
+  children: [Field];
+}
+
+/// ----------------------------------------------------------------------
+/// A Schema describes the columns in a row batch
+
+table Schema {
+  fields: [Field];
+}
+
+/// ----------------------------------------------------------------------
+/// Data structures for describing a table row batch (a collection of
+/// equal-length Arrow arrays)
+
+/// A Buffer represents a single contiguous memory segment
+struct Buffer {
+  /// The shared memory page id where this buffer is located. Currently this is
+  /// not used
+  page: int;
+
+  /// The relative offset into the shared memory page where the bytes for this
+  /// buffer starts
+  offset: long;
+
+  /// The absolute length (in bytes) of the memory buffer. The memory is found
+  /// from offset (inclusive) to offset + length (non-inclusive).
+  length: long;
+}
+
+/// Metadata about a field at some level of a nested type tree (but not
+/// its children).
+///
+/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+/// null_count: 0} for its Int16 node, as separate FieldNode structs
+struct FieldNode {
+  /// The number of value slots in the Arrow array at this level of a nested
+  /// tree
+  length: int;
+
+  /// The number of observed nulls. Fields with null_count == 0 may choose not
+  /// to write their physical null bitmap out as a materialized buffer, instead
+  /// setting the length of the null buffer to 0.
+  null_count: int;
+}
+
+/// A data header describing the shared memory layout of a "record" or "row"
+/// batch. Some systems call this a "row batch" internally and others a "record
+/// batch".
+table RecordBatch {
+  /// number of records / rows. The arrays in the batch should all have this
+  /// length
+  length: int;
+
+  /// Nodes correspond to the pre-ordered flattened logical schema
+  nodes: [FieldNode];
+
+  /// Buffers correspond to the pre-ordered flattened buffer tree
+  ///
+  /// The number of buffers appended to this list depends on the schema. For
+  /// example, most primitive arrays will have 2 buffers, 1 for the null bitmap
+  /// and 1 for the values. For struct arrays, there will only be a single
+  /// buffer for the null bitmap
+  buffers: [Buffer];
+}
+
+/// ----------------------------------------------------------------------
+/// For sending dictionary encoding information. Any Field can be
+/// dictionary-encoded, but in this case none of its children may be
+/// dictionary-encoded.
+///
+/// TODO(wesm): To be documented in more detail
+
+table DictionaryBatch {
+  id: long;
+  data: RecordBatch;
+}
+
+/// ----------------------------------------------------------------------
+/// The root Message type
+
+/// This union enables us to easily send different message types without
+/// redundant storage, and in the future we can easily add new message types.
+union MessageHeader {
+  Schema, DictionaryBatch, RecordBatch
+}
+
+table Message {
+  header: MessageHeader;
+  bodyLength: long;
+}
+
+root_type Message;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 8d93a15..9a08070 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -35,4 +35,6 @@ from pyarrow.schema import (null, bool_,
                             uint8, uint16, uint32, uint64,
                             float_, double, string,
                             list_, struct, field,
-                            DataType, Field, Schema)
+                            DataType, Field, Schema, schema)
+
+from pyarrow.array import RowBatch

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
index d0d3486..de3c774 100644
--- a/python/pyarrow/array.pxd
+++ b/python/pyarrow/array.pxd
@@ -16,7 +16,7 @@
 # under the License.
 
 from pyarrow.includes.common cimport shared_ptr
-from pyarrow.includes.libarrow cimport CArray, LogicalType
+from pyarrow.includes.libarrow cimport CArray
 
 from pyarrow.scalar import NA
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index bceb333..c5d40dd 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -28,6 +28,9 @@ from pyarrow.error cimport check_status
 cimport pyarrow.scalar as scalar
 from pyarrow.scalar import NA
 
+from pyarrow.schema cimport Schema
+import pyarrow.schema as schema
+
 def total_allocated_bytes():
     cdef MemoryPool* pool = pyarrow.GetMemoryPool()
     return pool.bytes_allocated()
@@ -155,12 +158,12 @@ cdef class StringArray(Array):
 
 
 cdef dict _array_classes = {
-    LogicalType_NA: NullArray,
-    LogicalType_BOOL: BooleanArray,
-    LogicalType_INT64: Int64Array,
-    LogicalType_DOUBLE: DoubleArray,
-    LogicalType_LIST: ListArray,
-    LogicalType_STRING: StringArray,
+    Type_NA: NullArray,
+    Type_BOOL: BooleanArray,
+    Type_INT64: Int64Array,
+    Type_DOUBLE: DoubleArray,
+    Type_LIST: ListArray,
+    Type_STRING: StringArray,
 }
 
 cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
@@ -190,3 +193,35 @@ def from_pylist(object list_obj, DataType type=None):
         raise NotImplementedError
 
     return box_arrow_array(sp_array)
+
+#----------------------------------------------------------------------
+# Table-like data structures
+
+cdef class RowBatch:
+    """
+
+    """
+    cdef readonly:
+        Schema schema
+        int num_rows
+        list arrays
+
+    def __cinit__(self, Schema schema, int num_rows, list arrays):
+        self.schema = schema
+        self.num_rows = num_rows
+        self.arrays = arrays
+
+        if len(self.schema) != len(arrays):
+            raise ValueError('Mismatch number of data arrays and '
+                             'schema fields')
+
+    def __len__(self):
+        return self.num_rows
+
+    property num_columns:
+
+        def __get__(self):
+            return len(self.arrays)
+
+    def __getitem__(self, i):
+        return self.arrays[i]

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index baba112..e6afcbd 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -21,31 +21,30 @@ from pyarrow.includes.common cimport *
 
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
-    enum LogicalType" arrow::LogicalType::type":
-        LogicalType_NA" arrow::LogicalType::NA"
+    enum Type" arrow::Type::type":
+        Type_NA" arrow::Type::NA"
 
-        LogicalType_BOOL" arrow::LogicalType::BOOL"
+        Type_BOOL" arrow::Type::BOOL"
 
-        LogicalType_UINT8" arrow::LogicalType::UINT8"
-        LogicalType_INT8" arrow::LogicalType::INT8"
-        LogicalType_UINT16" arrow::LogicalType::UINT16"
-        LogicalType_INT16" arrow::LogicalType::INT16"
-        LogicalType_UINT32" arrow::LogicalType::UINT32"
-        LogicalType_INT32" arrow::LogicalType::INT32"
-        LogicalType_UINT64" arrow::LogicalType::UINT64"
-        LogicalType_INT64" arrow::LogicalType::INT64"
+        Type_UINT8" arrow::Type::UINT8"
+        Type_INT8" arrow::Type::INT8"
+        Type_UINT16" arrow::Type::UINT16"
+        Type_INT16" arrow::Type::INT16"
+        Type_UINT32" arrow::Type::UINT32"
+        Type_INT32" arrow::Type::INT32"
+        Type_UINT64" arrow::Type::UINT64"
+        Type_INT64" arrow::Type::INT64"
 
-        LogicalType_FLOAT" arrow::LogicalType::FLOAT"
-        LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+        Type_FLOAT" arrow::Type::FLOAT"
+        Type_DOUBLE" arrow::Type::DOUBLE"
 
-        LogicalType_STRING" arrow::LogicalType::STRING"
+        Type_STRING" arrow::Type::STRING"
 
-        LogicalType_LIST" arrow::LogicalType::LIST"
-        LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+        Type_LIST" arrow::Type::LIST"
+        Type_STRUCT" arrow::Type::STRUCT"
 
     cdef cppclass CDataType" arrow::DataType":
-        LogicalType type
-        c_bool nullable
+        Type type
 
         c_bool Equals(const CDataType* other)
 
@@ -55,8 +54,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         int64_t bytes_allocated()
 
     cdef cppclass CListType" arrow::ListType"(CDataType):
-        CListType(const shared_ptr[CDataType]& value_type,
-                  c_bool nullable)
+        CListType(const shared_ptr[CDataType]& value_type)
 
     cdef cppclass CStringType" arrow::StringType"(CDataType):
         pass
@@ -65,21 +63,26 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_string name
         shared_ptr[CDataType] type
 
-        CField(const c_string& name, const shared_ptr[CDataType]& type)
+        c_bool nullable
+
+        CField(const c_string& name, const shared_ptr[CDataType]& type,
+               c_bool nullable)
 
     cdef cppclass CStructType" arrow::StructType"(CDataType):
-        CStructType(const vector[shared_ptr[CField]]& fields,
-                    c_bool nullable)
+        CStructType(const vector[shared_ptr[CField]]& fields)
 
     cdef cppclass CSchema" arrow::Schema":
-        CSchema(const shared_ptr[CField]& fields)
+        CSchema(const vector[shared_ptr[CField]]& fields)
+        const shared_ptr[CField]& field(int i)
+        int num_fields()
+        c_string ToString()
 
     cdef cppclass CArray" arrow::Array":
         const shared_ptr[CDataType]& type()
 
         int32_t length()
         int32_t null_count()
-        LogicalType logical_type()
+        Type type_enum()
 
         c_bool IsNull(int i)
 
@@ -122,3 +125,57 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CStringArray" arrow::StringArray"(CListArray):
         c_string GetString(int i)
+
+
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+    # We can later add more of the common status factory methods as needed
+    cdef CStatus CStatus_OK "Status::OK"()
+
+    cdef cppclass CStatus "arrow::Status":
+        CStatus()
+
+        c_string ToString()
+
+        c_bool ok()
+        c_bool IsOutOfMemory()
+        c_bool IsKeyError()
+        c_bool IsNotImplemented()
+        c_bool IsInvalid()
+
+    cdef cppclass Buffer:
+        uint8_t* data()
+        int64_t size()
+
+
+cdef extern from "arrow/ipc/metadata.h" namespace "arrow::ipc" nogil:
+    cdef cppclass SchemaMessage:
+        int num_fields()
+        CStatus GetField(int i, shared_ptr[CField]* out)
+        CStatus GetSchema(shared_ptr[CSchema]* out)
+
+    cdef cppclass FieldMetadata:
+        pass
+
+    cdef cppclass BufferMetadata:
+        pass
+
+    cdef cppclass RecordBatchMessage:
+        pass
+
+    cdef cppclass DictionaryBatchMessage:
+        pass
+
+    enum MessageType" arrow::ipc::Message::Type":
+        MessageType_SCHEMA" arrow::ipc::Message::SCHEMA"
+        MessageType_RECORD_BATCH" arrow::ipc::Message::RECORD_BATCH"
+        MessageType_DICTIONARY_BATCH" arrow::ipc::Message::DICTIONARY_BATCH"
+
+    cdef cppclass Message:
+        CStatus Open(const shared_ptr[Buffer]& buf,
+                     shared_ptr[Message]* out)
+        int64_t body_length()
+        MessageType type()
+
+        shared_ptr[SchemaMessage] GetSchema()
+        shared_ptr[RecordBatchMessage] GetRecordBatch()
+        shared_ptr[DictionaryBatchMessage] GetDictionaryBatch()

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
index 9a0c004..eedfc85 100644
--- a/python/pyarrow/includes/pyarrow.pxd
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -18,8 +18,7 @@
 # distutils: language = c++
 
 from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport (CArray, CDataType, LogicalType,
-                                        MemoryPool)
+from pyarrow.includes.libarrow cimport CArray, CDataType, Type, MemoryPool
 
 cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
     # We can later add more of the common status factory methods as needed
@@ -39,7 +38,7 @@ cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
         c_bool IsNotImplemented()
         c_bool IsArrowError()
 
-    shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+    shared_ptr[CDataType] GetPrimitiveType(Type type)
     Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
 
     MemoryPool* GetMemoryPool()

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 261a389..04f013d 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -172,18 +172,18 @@ cdef class ListValue(ArrayValue):
 
 
 cdef dict _scalar_classes = {
-    LogicalType_UINT8: Int8Value,
-    LogicalType_UINT16: Int16Value,
-    LogicalType_UINT32: Int32Value,
-    LogicalType_UINT64: Int64Value,
-    LogicalType_INT8: Int8Value,
-    LogicalType_INT16: Int16Value,
-    LogicalType_INT32: Int32Value,
-    LogicalType_INT64: Int64Value,
-    LogicalType_FLOAT: FloatValue,
-    LogicalType_DOUBLE: DoubleValue,
-    LogicalType_LIST: ListValue,
-    LogicalType_STRING: StringValue
+    Type_UINT8: Int8Value,
+    Type_UINT16: Int16Value,
+    Type_UINT32: Int32Value,
+    Type_UINT64: Int64Value,
+    Type_INT8: Int8Value,
+    Type_INT16: Int16Value,
+    Type_INT32: Int32Value,
+    Type_INT64: Int64Value,
+    Type_FLOAT: FloatValue,
+    Type_DOUBLE: DoubleValue,
+    Type_LIST: ListValue,
+    Type_STRING: StringValue
 }
 
 cdef object box_arrow_scalar(DataType type,

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
index 07b9bd0..61458b7 100644
--- a/python/pyarrow/schema.pxd
+++ b/python/pyarrow/schema.pxd
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport CDataType, CField, CSchema
 
 cdef class DataType:
@@ -33,9 +33,13 @@ cdef class Field:
     cdef readonly:
         DataType type
 
+    cdef init(self, const shared_ptr[CField]& field)
+
 cdef class Schema:
     cdef:
         shared_ptr[CSchema] sp_schema
         CSchema* schema
 
+    cdef init(self, const vector[shared_ptr[CField]]& fields)
+
 cdef DataType box_data_type(const shared_ptr[CDataType]& type)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index ea87872..b3bf02a 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -54,94 +54,153 @@ cdef class DataType:
 
 cdef class Field:
 
-    def __cinit__(self, object name, DataType type):
-        self.type = type
-        self.sp_field.reset(new CField(tobytes(name), type.sp_type))
-        self.field = self.sp_field.get()
+    def __cinit__(self):
+        pass
+
+    cdef init(self, const shared_ptr[CField]& field):
+        self.sp_field = field
+        self.field = field.get()
+
+    @classmethod
+    def from_py(cls, object name, DataType type, bint nullable=True):
+        cdef Field result = Field()
+        result.type = type
+        result.sp_field.reset(new CField(tobytes(name), type.sp_type,
+                                         nullable))
+        result.field = result.sp_field.get()
+
+        return result
 
     def __repr__(self):
         return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
 
+    property nullable:
+
+        def __get__(self):
+            return self.field.nullable
+
     property name:
 
         def __get__(self):
             return frombytes(self.field.name)
 
+cdef class Schema:
+
+    def __cinit__(self):
+        pass
+
+    def __len__(self):
+        return self.schema.num_fields()
+
+    def __getitem__(self, i):
+        if i < 0 or i >= len(self):
+            raise IndexError("{0} is out of bounds".format(i))
+
+        cdef Field result = Field()
+        result.init(self.schema.field(i))
+        result.type = box_data_type(result.field.type)
+
+        return result
+
+    cdef init(self, const vector[shared_ptr[CField]]& fields):
+        self.schema = new CSchema(fields)
+        self.sp_schema.reset(self.schema)
+
+    @classmethod
+    def from_fields(cls, fields):
+        cdef:
+            Schema result
+            Field field
+            vector[shared_ptr[CField]] c_fields
+
+        c_fields.resize(len(fields))
+
+        for i in range(len(fields)):
+            field = fields[i]
+            c_fields[i] = field.sp_field
+
+        result = Schema()
+        result.init(c_fields)
+
+        return result
+
+    def __repr__(self):
+        return frombytes(self.schema.ToString())
+
 cdef dict _type_cache = {}
 
-cdef DataType primitive_type(LogicalType type, bint nullable=True):
-    if (type, nullable) in _type_cache:
-        return _type_cache[type, nullable]
+cdef DataType primitive_type(Type type):
+    if type in _type_cache:
+        return _type_cache[type]
 
     cdef DataType out = DataType()
-    out.init(pyarrow.GetPrimitiveType(type, nullable))
+    out.init(pyarrow.GetPrimitiveType(type))
 
-    _type_cache[type, nullable] = out
+    _type_cache[type] = out
     return out
 
 #------------------------------------------------------------
 # Type factory functions
 
-def field(name, type):
-    return Field(name, type)
+def field(name, type, bint nullable=True):
+    return Field.from_py(name, type, nullable)
 
 cdef set PRIMITIVE_TYPES = set([
-    LogicalType_NA, LogicalType_BOOL,
-    LogicalType_UINT8, LogicalType_INT8,
-    LogicalType_UINT16, LogicalType_INT16,
-    LogicalType_UINT32, LogicalType_INT32,
-    LogicalType_UINT64, LogicalType_INT64,
-    LogicalType_FLOAT, LogicalType_DOUBLE])
+    Type_NA, Type_BOOL,
+    Type_UINT8, Type_INT8,
+    Type_UINT16, Type_INT16,
+    Type_UINT32, Type_INT32,
+    Type_UINT64, Type_INT64,
+    Type_FLOAT, Type_DOUBLE])
 
 def null():
-    return primitive_type(LogicalType_NA)
+    return primitive_type(Type_NA)
 
-def bool_(c_bool nullable=True):
-    return primitive_type(LogicalType_BOOL, nullable)
+def bool_():
+    return primitive_type(Type_BOOL)
 
-def uint8(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT8, nullable)
+def uint8():
+    return primitive_type(Type_UINT8)
 
-def int8(c_bool nullable=True):
-    return primitive_type(LogicalType_INT8, nullable)
+def int8():
+    return primitive_type(Type_INT8)
 
-def uint16(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT16, nullable)
+def uint16():
+    return primitive_type(Type_UINT16)
 
-def int16(c_bool nullable=True):
-    return primitive_type(LogicalType_INT16, nullable)
+def int16():
+    return primitive_type(Type_INT16)
 
-def uint32(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT32, nullable)
+def uint32():
+    return primitive_type(Type_UINT32)
 
-def int32(c_bool nullable=True):
-    return primitive_type(LogicalType_INT32, nullable)
+def int32():
+    return primitive_type(Type_INT32)
 
-def uint64(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT64, nullable)
+def uint64():
+    return primitive_type(Type_UINT64)
 
-def int64(c_bool nullable=True):
-    return primitive_type(LogicalType_INT64, nullable)
+def int64():
+    return primitive_type(Type_INT64)
 
-def float_(c_bool nullable=True):
-    return primitive_type(LogicalType_FLOAT, nullable)
+def float_():
+    return primitive_type(Type_FLOAT)
 
-def double(c_bool nullable=True):
-    return primitive_type(LogicalType_DOUBLE, nullable)
+def double():
+    return primitive_type(Type_DOUBLE)
 
-def string(c_bool nullable=True):
+def string():
     """
     UTF8 string
     """
-    return primitive_type(LogicalType_STRING, nullable)
+    return primitive_type(Type_STRING)
 
-def list_(DataType value_type, c_bool nullable=True):
+def list_(DataType value_type):
     cdef DataType out = DataType()
-    out.init(shared_ptr[CDataType](
-        new CListType(value_type.sp_type, nullable)))
+    out.init(shared_ptr[CDataType](new CListType(value_type.sp_type)))
     return out
 
-def struct(fields, c_bool nullable=True):
+def struct(fields):
     """
 
     """
@@ -154,9 +213,11 @@ def struct(fields, c_bool nullable=True):
         c_fields.push_back(field.sp_field)
 
     out.init(shared_ptr[CDataType](
-        new CStructType(c_fields, nullable)))
+        new CStructType(c_fields)))
     return out
 
+def schema(fields):
+    return Schema.from_fields(fields)
 
 cdef DataType box_data_type(const shared_ptr[CDataType]& type):
     cdef DataType out = DataType()

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 0235526..2894ea8 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -18,6 +18,8 @@
 from pyarrow.compat import unittest
 import pyarrow as arrow
 
+A = arrow
+
 
 class TestTypes(unittest.TestCase):
 
@@ -28,15 +30,12 @@ class TestTypes(unittest.TestCase):
         for name in dtypes:
             factory = getattr(arrow, name)
             t = factory()
-            t_required = factory(False)
-
             assert str(t) == name
-            assert str(t_required) == '{0} not null'.format(name)
 
     def test_list(self):
         value_type = arrow.int32()
         list_type = arrow.list_(value_type)
-        assert str(list_type) == 'list<int32>'
+        assert str(list_type) == 'list<item: int32>'
 
     def test_string(self):
         t = arrow.string()
@@ -47,5 +46,26 @@ class TestTypes(unittest.TestCase):
         f = arrow.field('foo', t)
 
         assert f.name == 'foo'
+        assert f.nullable
         assert f.type is t
         assert repr(f) == "Field('foo', type=string)"
+
+        f = arrow.field('foo', t, False)
+        assert not f.nullable
+
+    def test_schema(self):
+        fields = [
+            A.field('foo', A.int32()),
+            A.field('bar', A.string()),
+            A.field('baz', A.list_(A.int8()))
+        ]
+        sch = A.schema(fields)
+
+        assert len(sch) == 3
+        assert sch[0].name == 'foo'
+        assert sch[0].type == fields[0].type
+
+        assert repr(sch) == """\
+foo: int32
+bar: string
+baz: list<item: int8>"""

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
new file mode 100644
index 0000000..2e24445
--- /dev/null
+++ b/python/pyarrow/tests/test_table.py
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow as arrow
+
+A = arrow
+
+
+class TestRowBatch(unittest.TestCase):
+
+    def test_basics(self):
+        data = [
+            A.from_pylist(range(5)),
+            A.from_pylist([-10, -5, 0, 5, 10])
+        ]
+        num_rows = 5
+
+        descr = A.schema([A.field('c0', data[0].type),
+                          A.field('c1', data[1].type)])
+
+        batch = A.RowBatch(descr, num_rows, data)
+
+        assert len(batch) == num_rows
+        assert batch.num_rows == num_rows
+        assert batch.num_columns == len(data)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
index bb79052..acb13ac 100644
--- a/python/src/pyarrow/adapters/builtin.cc
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -27,7 +27,7 @@
 
 using arrow::ArrayBuilder;
 using arrow::DataType;
-using arrow::LogicalType;
+using arrow::Type;
 
 namespace pyarrow {
 
@@ -356,17 +356,17 @@ class ListConverter : public TypedConverter<arrow::ListBuilder> {
 // Dynamic constructor for sequence converters
 std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
   switch (type->type) {
-    case LogicalType::BOOL:
+    case Type::BOOL:
       return std::make_shared<BoolConverter>();
-    case LogicalType::INT64:
+    case Type::INT64:
       return std::make_shared<Int64Converter>();
-    case LogicalType::DOUBLE:
+    case Type::DOUBLE:
       return std::make_shared<DoubleConverter>();
-    case LogicalType::STRING:
+    case Type::STRING:
       return std::make_shared<StringConverter>();
-    case LogicalType::LIST:
+    case Type::LIST:
       return std::make_shared<ListConverter>();
-    case LogicalType::STRUCT:
+    case Type::STRUCT:
     default:
       return nullptr;
       break;
@@ -378,7 +378,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
   typed_builder_ = static_cast<arrow::ListBuilder*>(builder.get());
 
   value_converter_ = GetConverter(static_cast<arrow::ListType*>(
-          builder->type().get())->value_type);
+          builder->type().get())->value_type());
   if (value_converter_ == nullptr) {
     return Status::NotImplemented("value type not implemented");
   }
@@ -393,8 +393,8 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
   PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type));
 
   // Handle NA / NullType case
-  if (type->type == LogicalType::NA) {
-    out->reset(new arrow::Array(type, size, size));
+  if (type->type == Type::NA) {
+    out->reset(new arrow::NullArray(type, size));
     return Status::OK();
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc
index 0921fc4..08003aa 100644
--- a/python/src/pyarrow/helpers.cc
+++ b/python/src/pyarrow/helpers.cc
@@ -37,19 +37,14 @@ const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
 const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
 const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();
 
-#define GET_PRIMITIVE_TYPE(NAME, Type)          \
-  case LogicalType::NAME:                       \
-    if (nullable) {                             \
-      return NAME;                              \
-    } else {                                    \
-      return std::make_shared<Type>(nullable);  \
-    }                                           \
+#define GET_PRIMITIVE_TYPE(NAME, Class)         \
+  case Type::NAME:                              \
+    return NAME;                                \
     break;
 
-std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
-    bool nullable) {
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
   switch (type) {
-    case LogicalType::NA:
+    case Type::NA:
       return NA;
     GET_PRIMITIVE_TYPE(UINT8, UInt8Type);
     GET_PRIMITIVE_TYPE(INT8, Int8Type);

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.h b/python/src/pyarrow/helpers.h
index e41568d..ec42bb3 100644
--- a/python/src/pyarrow/helpers.h
+++ b/python/src/pyarrow/helpers.h
@@ -24,7 +24,7 @@
 namespace pyarrow {
 
 using arrow::DataType;
-using arrow::LogicalType;
+using arrow::Type;
 
 extern const std::shared_ptr<arrow::NullType> NA;
 extern const std::shared_ptr<arrow::BooleanType> BOOL;
@@ -40,8 +40,7 @@ extern const std::shared_ptr<arrow::FloatType> FLOAT;
 extern const std::shared_ptr<arrow::DoubleType> DOUBLE;
 extern const std::shared_ptr<arrow::StringType> STRING;
 
-std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
-    bool nullable);
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
 
 } // namespace pyarrow
 


Mime
View raw message