arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [2/2] arrow git commit: ARROW-261: Refactor String/Binary code paths to reflect unnested (non-list-based) structure
Date Mon, 17 Oct 2016 17:44:45 GMT
ARROW-261: Refactor String/Binary code paths to reflect unnested (non-list-based) structure

Per discussions on the mailing list. This should in theory match the Java implementation.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #176 from wesm/ARROW-261 and squashes the following commits:

dca39ce [Wes McKinney] Make binary/string constants static to avoid memory-access-related segfaults in third party libraries
1e65b01 [Wes McKinney] Deprecate pyarrow::Status in favor of just arrow::Status. Conform pyarrow use of ArrayBuilder::Finish
9a1f77e [Wes McKinney] Add license header to index.rst
bd70cab [Wes McKinney] Complete refactoring, fix up IPC tests for flattened string/binary buffer/metadata layout
ae64f2e [Wes McKinney] Refactoring to reflect collaprsed list-like structure of Binary and String types. Not yet complete


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/732a2059
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/732a2059
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/732a2059

Branch: refs/heads/master
Commit: 732a2059d0c4493e451c566160b9d5d01dfe87be
Parents: 8e8b17f
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Mon Oct 17 13:44:34 2016 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon Oct 17 13:44:34 2016 -0400

----------------------------------------------------------------------
 cpp/CMakeLists.txt                     |   1 -
 cpp/src/arrow/builder.h                |   2 +-
 cpp/src/arrow/ipc/adapter.cc           |  47 +++++----
 cpp/src/arrow/ipc/test-common.h        |  19 ++--
 cpp/src/arrow/type.h                   |  20 +---
 cpp/src/arrow/types/CMakeLists.txt     |   1 -
 cpp/src/arrow/types/binary.h           |  28 ------
 cpp/src/arrow/types/construct.cc       |  31 +-----
 cpp/src/arrow/types/construct.h        |   8 --
 cpp/src/arrow/types/json.cc            |  37 -------
 cpp/src/arrow/types/json.h             |  36 -------
 cpp/src/arrow/types/list-test.cc       |  15 ++-
 cpp/src/arrow/types/list.cc            |  42 +++++++-
 cpp/src/arrow/types/list.h             |  49 ++--------
 cpp/src/arrow/types/primitive-test.cc  |  26 +++--
 cpp/src/arrow/types/primitive.cc       |  31 ++++--
 cpp/src/arrow/types/primitive.h        |  31 +++---
 cpp/src/arrow/types/string-test.cc     |  33 +++----
 cpp/src/arrow/types/string.cc          | 101 +++++++++++++++----
 cpp/src/arrow/types/string.h           |  49 ++++++----
 cpp/src/arrow/types/struct-test.cc     |  21 ++--
 cpp/src/arrow/types/struct.cc          |  14 +++
 cpp/src/arrow/types/struct.h           |  17 +---
 cpp/src/arrow/util/status.cc           |   6 ++
 cpp/src/arrow/util/status.h            |  17 +++-
 python/CMakeLists.txt                  |   2 -
 python/doc/index.rst                   |  18 +++-
 python/pyarrow/error.pxd               |   4 +-
 python/pyarrow/error.pyx               |  10 +-
 python/pyarrow/includes/pyarrow.pxd    |  35 ++-----
 python/pyarrow/io.pyx                  |  56 +++++------
 python/pyarrow/ipc.pyx                 |  18 ++--
 python/pyarrow/parquet.pyx             |  14 +--
 python/src/pyarrow/adapters/builtin.cc |  39 ++++----
 python/src/pyarrow/adapters/builtin.h  |   9 +-
 python/src/pyarrow/adapters/pandas.cc  |  32 +++---
 python/src/pyarrow/adapters/pandas.h   |  15 ++-
 python/src/pyarrow/api.h               |   2 -
 python/src/pyarrow/common.cc           |  12 +--
 python/src/pyarrow/common.h            |   7 --
 python/src/pyarrow/io.cc               |  59 +++++------
 python/src/pyarrow/status.cc           |  92 ------------------
 python/src/pyarrow/status.h            | 146 ----------------------------
 43 files changed, 484 insertions(+), 768 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d682dc7..6f95483 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -681,7 +681,6 @@ set(ARROW_SRCS
 
   src/arrow/types/construct.cc
   src/arrow/types/decimal.cc
-  src/arrow/types/json.cc
   src/arrow/types/list.cc
   src/arrow/types/primitive.cc
   src/arrow/types/string.cc

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 646a6f2..cef17e5 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -93,7 +93,7 @@ class ARROW_EXPORT ArrayBuilder {
 
   // Creates new array object to hold the contents of the builder and transfers
   // ownership of the data.  This resets all variables on the builder.
-  virtual std::shared_ptr<Array> Finish() = 0;
+  virtual Status Finish(std::shared_ptr<Array>* out) = 0;
 
   const std::shared_ptr<DataType>& type() const { return type_; }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index cd8ab53..f84cb26 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -78,22 +78,6 @@ static bool IsPrimitive(const DataType* type) {
   }
 }
 
-static bool IsListType(const DataType* type) {
-  DCHECK(type != nullptr);
-  switch (type->type) {
-    // TODO(emkornfield) grouping like this are used in a few places in the
-    // code consider using pattern like:
-    // http://stackoverflow.com/questions/26784685/c-macro-for-calling-function-based-on-enum-type
-    //
-    case Type::BINARY:
-    case Type::LIST:
-    case Type::STRING:
-      return true;
-    default:
-      return false;
-  }
-}
-
 // ----------------------------------------------------------------------
 // Record batch write path
 
@@ -115,7 +99,11 @@ Status VisitArray(const Array* arr, std::vector<flatbuf::FieldNode>* field_nodes
   if (IsPrimitive(arr_type)) {
     const auto prim_arr = static_cast<const PrimitiveArray*>(arr);
     buffers->push_back(prim_arr->data());
-  } else if (IsListType(arr_type)) {
+  } else if (arr->type_enum() == Type::STRING || arr->type_enum() == Type::BINARY) {
+    const auto binary_arr = static_cast<const BinaryArray*>(arr);
+    buffers->push_back(binary_arr->offsets());
+    buffers->push_back(binary_arr->data());
+  } else if (arr->type_enum() == Type::LIST) {
     const auto list_arr = static_cast<const ListArray*>(arr);
     buffers->push_back(list_arr->offset_buffer());
     RETURN_NOT_OK(VisitArray(
@@ -331,9 +319,21 @@ class RecordBatchReader::RecordBatchReaderImpl {
       }
       return MakePrimitiveArray(
           type, field_meta.length, data, field_meta.null_count, null_bitmap, out);
-    }
+    } else if (type->type == Type::STRING || type->type == Type::BINARY) {
+      std::shared_ptr<Buffer> offsets;
+      std::shared_ptr<Buffer> values;
+      RETURN_NOT_OK(GetBuffer(buffer_index_++, &offsets));
+      RETURN_NOT_OK(GetBuffer(buffer_index_++, &values));
 
-    if (IsListType(type.get())) {
+      if (type->type == Type::STRING) {
+        *out = std::make_shared<StringArray>(
+            field_meta.length, offsets, values, field_meta.null_count, null_bitmap);
+      } else {
+        *out = std::make_shared<BinaryArray>(
+            field_meta.length, offsets, values, field_meta.null_count, null_bitmap);
+      }
+      return Status::OK();
+    } else if (type->type == Type::LIST) {
       std::shared_ptr<Buffer> offsets;
       RETURN_NOT_OK(GetBuffer(buffer_index_++, &offsets));
       const int num_children = type->num_children();
@@ -346,11 +346,10 @@ class RecordBatchReader::RecordBatchReaderImpl {
       std::shared_ptr<Array> values_array;
       RETURN_NOT_OK(
           NextArray(type->child(0).get(), max_recursion_depth - 1, &values_array));
-      return MakeListArray(type, field_meta.length, offsets, values_array,
-          field_meta.null_count, null_bitmap, out);
-    }
-
-    if (type->type == Type::STRUCT) {
+      *out = std::make_shared<ListArray>(type, field_meta.length, offsets, values_array,
+          field_meta.null_count, null_bitmap);
+      return Status::OK();
+    } else if (type->type == Type::STRUCT) {
       const int num_children = type->num_children();
       std::vector<ArrayPtr> fields;
       fields.reserve(num_children);

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index 7d02bc3..13bbbeb 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -42,7 +42,7 @@ const auto kListInt32 = std::make_shared<ListType>(kInt32);
 const auto kListListInt32 = std::make_shared<ListType>(kListInt32);
 
 Status MakeRandomInt32Array(
-    int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* array) {
+    int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) {
   std::shared_ptr<PoolBuffer> data;
   test::MakeRandomInt32PoolBuffer(length, pool, &data);
   const auto kInt32 = std::make_shared<Int32Type>();
@@ -52,16 +52,14 @@ Status MakeRandomInt32Array(
     test::MakeRandomBytePoolBuffer(length, pool, &valid_bytes);
     RETURN_NOT_OK(builder.Append(
         reinterpret_cast<const int32_t*>(data->data()), length, valid_bytes->data()));
-    *array = builder.Finish();
-    return Status::OK();
+    return builder.Finish(out);
   }
   RETURN_NOT_OK(builder.Append(reinterpret_cast<const int32_t*>(data->data()), length));
-  *array = builder.Finish();
-  return Status::OK();
+  return builder.Finish(out);
 }
 
 Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_lists,
-    bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* array) {
+    bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) {
   // Create the null list values
   std::vector<uint8_t> valid_lists(num_lists);
   const double null_percent = include_nulls ? 0.1 : 0;
@@ -90,8 +88,8 @@ Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_li
   }
   ListBuilder builder(pool, child_array);
   RETURN_NOT_OK(builder.Append(offsets.data(), num_lists, valid_lists.data()));
-  *array = builder.Finish();
-  return (*array)->Validate();
+  RETURN_NOT_OK(builder.Finish(out));
+  return (*out)->Validate();
 }
 
 typedef Status MakeRecordBatch(std::shared_ptr<RecordBatch>* out);
@@ -115,7 +113,7 @@ Status MakeIntRecordBatch(std::shared_ptr<RecordBatch>* out) {
 
 template <class Builder, class RawType>
 Status MakeRandomBinaryArray(
-    const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* array) {
+    const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* out) {
   const std::vector<std::string> values = {
       "", "", "abc", "123", "efg", "456!@#!@#", "12312"};
   Builder builder(pool, type);
@@ -130,8 +128,7 @@ Status MakeRandomBinaryArray(
           builder.Append(reinterpret_cast<const RawType*>(value.data()), value.size()));
     }
   }
-  *array = builder.Finish();
-  return Status::OK();
+  return builder.Finish(out);
 }
 
 Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index b4c3721..ea8516f 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -242,7 +242,7 @@ struct ARROW_EXPORT DoubleType : public PrimitiveType<DoubleType> {
 struct ARROW_EXPORT ListType : public DataType {
   // List can contain any other logical value type
   explicit ListType(const std::shared_ptr<DataType>& value_type)
-      : ListType(value_type, Type::LIST) {}
+      : ListType(std::make_shared<Field>("item", value_type)) {}
 
   explicit ListType(const std::shared_ptr<Field>& value_field) : DataType(Type::LIST) {
     children_ = {value_field};
@@ -255,26 +255,17 @@ struct ARROW_EXPORT ListType : public DataType {
   static char const* name() { return "list"; }
 
   std::string ToString() const override;
-
- protected:
-  // Constructor for classes that are implemented as List Arrays.
-  ListType(const std::shared_ptr<DataType>& value_type, Type::type logical_type)
-      : DataType(logical_type) {
-    // TODO ARROW-187 this can technically fail, make a constructor method ?
-    children_ = {std::make_shared<Field>("item", value_type)};
-  }
 };
 
 // BinaryType type is reprsents lists of 1-byte values.
-struct ARROW_EXPORT BinaryType : public ListType {
+struct ARROW_EXPORT BinaryType : public DataType {
   BinaryType() : BinaryType(Type::BINARY) {}
   static char const* name() { return "binary"; }
   std::string ToString() const override;
 
  protected:
   // Allow subclasses to change the logical type.
-  explicit BinaryType(Type::type logical_type)
-      : ListType(std::shared_ptr<DataType>(new UInt8Type()), logical_type) {}
+  explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
 };
 
 // UTF encoded strings
@@ -284,9 +275,6 @@ struct ARROW_EXPORT StringType : public BinaryType {
   static char const* name() { return "string"; }
 
   std::string ToString() const override;
-
- protected:
-  explicit StringType(Type::type logical_type) : BinaryType(logical_type) {}
 };
 
 struct ARROW_EXPORT StructType : public DataType {
@@ -300,7 +288,7 @@ struct ARROW_EXPORT StructType : public DataType {
 
 // These will be defined elsewhere
 template <typename T>
-struct type_traits {};
+struct TypeTraits {};
 
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 72a8e77..9f78169 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -25,7 +25,6 @@ install(FILES
   construct.h
   datetime.h
   decimal.h
-  json.h
   list.h
   primitive.h
   string.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/binary.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/binary.h b/cpp/src/arrow/types/binary.h
deleted file mode 100644
index 201fbb6..0000000
--- a/cpp/src/arrow/types/binary.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_BINARY_H
-#define ARROW_TYPES_BINARY_H
-
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {}  // namespace arrow
-
-#endif  // ARROW_TYPES_BINARY_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 0b71ea9..67245f8 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -59,6 +59,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     BUILDER_CASE(DOUBLE, DoubleBuilder);
 
     BUILDER_CASE(STRING, StringBuilder);
+    BUILDER_CASE(BINARY, BinaryBuilder);
 
     case Type::LIST: {
       std::shared_ptr<ArrayBuilder> value_builder;
@@ -105,10 +106,10 @@ Status MakePrimitiveArray(const TypePtr& type, int32_t length,
     MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
     MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
     MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
-    MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array);
-    MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray);
     MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
     MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
+    MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray);
     MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP_DOUBLE, DoubleArray);
     default:
       return Status::NotImplemented(type->ToString());
@@ -120,30 +121,4 @@ Status MakePrimitiveArray(const TypePtr& type, int32_t length,
 #endif
 }
 
-Status MakeListArray(const TypePtr& type, int32_t length,
-    const std::shared_ptr<Buffer>& offsets, const ArrayPtr& values, int32_t null_count,
-    const std::shared_ptr<Buffer>& null_bitmap, ArrayPtr* out) {
-  switch (type->type) {
-    case Type::BINARY:
-      out->reset(new BinaryArray(type, length, offsets, values, null_count, null_bitmap));
-      break;
-
-    case Type::LIST:
-      out->reset(new ListArray(type, length, offsets, values, null_count, null_bitmap));
-      break;
-
-    case Type::DECIMAL_TEXT:
-    case Type::STRING:
-      out->reset(new StringArray(type, length, offsets, values, null_count, null_bitmap));
-      break;
-    default:
-      return Status::NotImplemented(type->ToString());
-  }
-#ifdef NDEBUG
-  return Status::OK();
-#else
-  return (*out)->Validate();
-#endif
-}
-
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index afdadbe..e18e946 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -42,14 +42,6 @@ Status ARROW_EXPORT MakePrimitiveArray(const std::shared_ptr<DataType>& type,
     int32_t length, const std::shared_ptr<Buffer>& data, int32_t null_count,
     const std::shared_ptr<Buffer>& null_bitmap, std::shared_ptr<Array>* out);
 
-// Create new list arrays for logical types that are backed by ListArrays (e.g. list of
-// primitives and strings)
-// TODO(emkornfield) split up string vs list?
-Status ARROW_EXPORT MakeListArray(const std::shared_ptr<DataType>& type, int32_t length,
-    const std::shared_ptr<Buffer>& offests, const std::shared_ptr<Array>& values,
-    int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap,
-    std::shared_ptr<Array>* out);
-
 }  // namespace arrow
 
 #endif  // ARROW_BUILDER_H_

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/json.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc
deleted file mode 100644
index 89240fc..0000000
--- a/cpp/src/arrow/types/json.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/json.h"
-
-#include <vector>
-
-#include "arrow/type.h"
-#include "arrow/types/union.h"
-
-namespace arrow {
-
-static const TypePtr Null(new NullType());
-static const TypePtr Int32(new Int32Type());
-static const TypePtr String(new StringType());
-static const TypePtr Double(new DoubleType());
-static const TypePtr Bool(new BooleanType());
-
-static const std::vector<TypePtr> kJsonTypes = {Null, Int32, String, Double, Bool};
-TypePtr JSONScalar::dense_type = TypePtr(new DenseUnionType(kJsonTypes));
-TypePtr JSONScalar::sparse_type = TypePtr(new SparseUnionType(kJsonTypes));
-
-}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/json.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h
deleted file mode 100644
index 9de961f..0000000
--- a/cpp/src/arrow/types/json.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_JSON_H
-#define ARROW_TYPES_JSON_H
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-struct JSONScalar : public DataType {
-  bool dense;
-
-  static TypePtr dense_type;
-  static TypePtr sparse_type;
-
-  explicit JSONScalar(bool dense = true) : DataType(Type::JSON_SCALAR), dense(dense) {}
-};
-
-}  // namespace arrow
-
-#endif  // ARROW_TYPES_JSON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 2e41b4a..12c5394 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -76,7 +76,11 @@ class TestListBuilder : public TestBuilder {
     builder_ = std::dynamic_pointer_cast<ListBuilder>(tmp);
   }
 
-  void Done() { result_ = std::dynamic_pointer_cast<ListArray>(builder_->Finish()); }
+  void Done() {
+    std::shared_ptr<Array> out;
+    EXPECT_OK(builder_->Finish(&out));
+    result_ = std::dynamic_pointer_cast<ListArray>(out);
+  }
 
  protected:
   TypePtr value_type_;
@@ -98,14 +102,17 @@ TEST_F(TestListBuilder, Equality) {
   // setup two equal arrays
   ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
   ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
-  array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&array));
   ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
   ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
-  equal_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&equal_array));
   // now an unequal one
   ASSERT_OK(builder_->Append(unequal_offsets.data(), unequal_offsets.size()));
   ASSERT_OK(vb->Append(unequal_values.data(), unequal_values.size()));
-  unequal_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&unequal_array));
 
   // Test array equality
   EXPECT_TRUE(array->Equals(array));

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 6334054..ef2ec22 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -25,7 +25,7 @@ bool ListArray::EqualsExact(const ListArray& other) const {
   if (null_count_ != other.null_count_) { return false; }
 
   bool equal_offsets =
-      offset_buf_->Equals(*other.offset_buf_, (length_ + 1) * sizeof(int32_t));
+      offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
   if (!equal_offsets) { return false; }
   bool equal_null_bitmap = true;
   if (null_count_ > 0) {
@@ -72,10 +72,10 @@ bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_st
 
 Status ListArray::Validate() const {
   if (length_ < 0) { return Status::Invalid("Length was negative"); }
-  if (!offset_buf_) { return Status::Invalid("offset_buf_ was null"); }
-  if (offset_buf_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
+  if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); }
+  if (offset_buffer_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
     std::stringstream ss;
-    ss << "offset buffer size (bytes): " << offset_buf_->size()
+    ss << "offset buffer size (bytes): " << offset_buffer_->size()
        << " isn't large enough for length: " << length_;
     return Status::Invalid(ss.str());
   }
@@ -121,4 +121,38 @@ Status ListArray::Validate() const {
   return Status::OK();
 }
 
+Status ListBuilder::Init(int32_t elements) {
+  DCHECK_LT(elements, std::numeric_limits<int32_t>::max());
+  RETURN_NOT_OK(ArrayBuilder::Init(elements));
+  // one more then requested for offsets
+  return offset_builder_.Resize((elements + 1) * sizeof(int32_t));
+}
+
+Status ListBuilder::Resize(int32_t capacity) {
+  DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
+  // one more then requested for offsets
+  RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t)));
+  return ArrayBuilder::Resize(capacity);
+}
+
+Status ListBuilder::Finish(std::shared_ptr<Array>* out) {
+  std::shared_ptr<Array> items = values_;
+  if (!items) { RETURN_NOT_OK(value_builder_->Finish(&items)); }
+
+  RETURN_NOT_OK(offset_builder_.Append<int32_t>(items->length()));
+  std::shared_ptr<Buffer> offsets = offset_builder_.Finish();
+
+  *out = std::make_shared<ListArray>(
+      type_, length_, offsets, items, null_count_, null_bitmap_);
+
+  Reset();
+
+  return Status::OK();
+}
+
+void ListBuilder::Reset() {
+  capacity_ = length_ = null_count_ = 0;
+  null_bitmap_ = nullptr;
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index f389451..9440ffe 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -43,9 +43,9 @@ class ARROW_EXPORT ListArray : public Array {
       const ArrayPtr& values, int32_t null_count = 0,
       std::shared_ptr<Buffer> null_bitmap = nullptr)
       : Array(type, length, null_count, null_bitmap) {
-    offset_buf_ = offsets;
-    offsets_ = offsets == nullptr ? nullptr
-                                  : reinterpret_cast<const int32_t*>(offset_buf_->data());
+    offset_buffer_ = offsets;
+    offsets_ = offsets == nullptr ? nullptr : reinterpret_cast<const int32_t*>(
+                                                  offset_buffer_->data());
     values_ = values;
   }
 
@@ -57,7 +57,7 @@ class ARROW_EXPORT ListArray : public Array {
   // with this array.
   const std::shared_ptr<Array>& values() const { return values_; }
   const std::shared_ptr<Buffer> offset_buffer() const {
-    return std::static_pointer_cast<Buffer>(offset_buf_);
+    return std::static_pointer_cast<Buffer>(offset_buffer_);
   }
 
   const std::shared_ptr<DataType>& value_type() const { return values_->type(); }
@@ -77,7 +77,7 @@ class ARROW_EXPORT ListArray : public Array {
       const ArrayPtr& arr) const override;
 
  protected:
-  std::shared_ptr<Buffer> offset_buf_;
+  std::shared_ptr<Buffer> offset_buffer_;
   const int32_t* offsets_;
   ArrayPtr values_;
 };
@@ -119,19 +119,9 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
 
   virtual ~ListBuilder() {}
 
-  Status Init(int32_t elements) override {
-    DCHECK_LT(elements, std::numeric_limits<int32_t>::max());
-    RETURN_NOT_OK(ArrayBuilder::Init(elements));
-    // one more then requested for offsets
-    return offset_builder_.Resize((elements + 1) * sizeof(int32_t));
-  }
-
-  Status Resize(int32_t capacity) override {
-    DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
-    // one more then requested for offsets
-    RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t)));
-    return ArrayBuilder::Resize(capacity);
-  }
+  Status Init(int32_t elements) override;
+  Status Resize(int32_t capacity) override;
+  Status Finish(std::shared_ptr<Array>* out) override;
 
   // Vector append
   //
@@ -145,27 +135,6 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  // The same as Finalize but allows for overridding the c++ type
-  template <typename Container>
-  std::shared_ptr<Array> Transfer() {
-    std::shared_ptr<Array> items = values_;
-    if (!items) { items = value_builder_->Finish(); }
-
-    offset_builder_.Append<int32_t>(items->length());
-
-    const auto offsets_buffer = offset_builder_.Finish();
-    auto result = std::make_shared<Container>(
-        type_, length_, offsets_buffer, items, null_count_, null_bitmap_);
-
-    // TODO(emkornfield) make a reset method
-    capacity_ = length_ = null_count_ = 0;
-    null_bitmap_ = nullptr;
-
-    return result;
-  }
-
-  std::shared_ptr<Array> Finish() override { return Transfer<ListArray>(); }
-
   // Start a new variable-length list slot
   //
   // This function should be called before beginning to append elements to the
@@ -188,6 +157,8 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
   BufferBuilder offset_builder_;
   std::shared_ptr<ArrayBuilder> value_builder_;
   std::shared_ptr<Array> values_;
+
+  void Reset();
 };
 
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 5ac2867..121bd47 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -123,8 +123,11 @@ class TestPrimitiveBuilder : public TestBuilder {
 
     auto expected =
         std::make_shared<ArrayType>(size, ex_data, ex_null_count, ex_null_bitmap);
-    std::shared_ptr<ArrayType> result =
-        std::dynamic_pointer_cast<ArrayType>(builder->Finish());
+
+    std::shared_ptr<Array> out;
+    ASSERT_OK(builder->Finish(&out));
+
+    std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(out);
 
     // Builder is now reset
     ASSERT_EQ(0, builder->length());
@@ -216,8 +219,10 @@ void TestPrimitiveBuilder<PBoolean>::Check(
 
   auto expected =
       std::make_shared<BooleanArray>(size, ex_data, ex_null_count, ex_null_bitmap);
-  std::shared_ptr<BooleanArray> result =
-      std::dynamic_pointer_cast<BooleanArray>(builder->Finish());
+
+  std::shared_ptr<Array> out;
+  ASSERT_OK(builder->Finish(&out));
+  std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(out);
 
   // Builder is now reset
   ASSERT_EQ(0, builder->length());
@@ -254,7 +259,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) {
   int n = 1000;
   ASSERT_OK(this->builder_->Reserve(n));
   ASSERT_EQ(util::next_power2(n), this->builder_->capacity());
-  ASSERT_EQ(util::next_power2(type_traits<Type>::bytes_required(n)),
+  ASSERT_EQ(util::next_power2(TypeTraits<Type>::bytes_required(n)),
       this->builder_->data()->size());
 
   // unsure if this should go in all builder classes
@@ -267,7 +272,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
     ASSERT_OK(this->builder_->AppendNull());
   }
 
-  auto result = this->builder_->Finish();
+  std::shared_ptr<Array> result;
+  ASSERT_OK(this->builder_->Finish(&result));
 
   for (int i = 0; i < size; ++i) {
     ASSERT_TRUE(result->IsNull(i)) << i;
@@ -298,7 +304,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
   }
 
   do {
-    std::shared_ptr<Array> result = this->builder_->Finish();
+    std::shared_ptr<Array> result;
+    ASSERT_OK(this->builder_->Finish(&result));
   } while (false);
 
   ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
@@ -315,8 +322,7 @@ Status MakeArray(const vector<uint8_t>& valid_bytes, const vector<T>& draws, int
       RETURN_NOT_OK(builder->AppendNull());
     }
   }
-  *out = builder->Finish();
-  return Status::OK();
+  return builder->Finish(out);
 }
 
 TYPED_TEST(TestPrimitiveBuilder, Equality) {
@@ -465,7 +471,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
   ASSERT_OK(this->builder_->Reserve(cap));
   ASSERT_EQ(cap, this->builder_->capacity());
 
-  ASSERT_EQ(type_traits<Type>::bytes_required(cap), this->builder_->data()->size());
+  ASSERT_EQ(TypeTraits<Type>::bytes_required(cap), this->builder_->data()->size());
   ASSERT_EQ(util::bytes_for_bits(cap), this->builder_->null_bitmap()->size());
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index 9ba2ebd..3a05ccf 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -69,12 +69,25 @@ bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
   return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
 }
 
+template class NumericArray<UInt8Type>;
+template class NumericArray<UInt16Type>;
+template class NumericArray<UInt32Type>;
+template class NumericArray<UInt64Type>;
+template class NumericArray<Int8Type>;
+template class NumericArray<Int16Type>;
+template class NumericArray<Int32Type>;
+template class NumericArray<Int64Type>;
+template class NumericArray<TimestampType>;
+template class NumericArray<FloatType>;
+template class NumericArray<DoubleType>;
+template class NumericArray<BooleanType>;
+
 template <typename T>
 Status PrimitiveBuilder<T>::Init(int32_t capacity) {
   RETURN_NOT_OK(ArrayBuilder::Init(capacity));
   data_ = std::make_shared<PoolBuffer>(pool_);
 
-  int64_t nbytes = type_traits<T>::bytes_required(capacity);
+  int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
   RETURN_NOT_OK(data_->Resize(nbytes));
   // TODO(emkornfield) valgrind complains without this
   memset(data_->mutable_data(), 0, nbytes);
@@ -93,10 +106,9 @@ Status PrimitiveBuilder<T>::Resize(int32_t capacity) {
   } else {
     RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
     const int64_t old_bytes = data_->size();
-    const int64_t new_bytes = type_traits<T>::bytes_required(capacity);
+    const int64_t new_bytes = TypeTraits<T>::bytes_required(capacity);
     RETURN_NOT_OK(data_->Resize(new_bytes));
     raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
-
     memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
   }
   return Status::OK();
@@ -108,7 +120,7 @@ Status PrimitiveBuilder<T>::Append(
   RETURN_NOT_OK(Reserve(length));
 
   if (length > 0) {
-    memcpy(raw_data_ + length_, values, type_traits<T>::bytes_required(length));
+    memcpy(raw_data_ + length_, values, TypeTraits<T>::bytes_required(length));
   }
 
   // length_ is update by these
@@ -118,13 +130,18 @@ Status PrimitiveBuilder<T>::Append(
 }
 
 template <typename T>
-std::shared_ptr<Array> PrimitiveBuilder<T>::Finish() {
-  std::shared_ptr<Array> result = std::make_shared<typename type_traits<T>::ArrayType>(
+Status PrimitiveBuilder<T>::Finish(std::shared_ptr<Array>* out) {
+  const int64_t bytes_required = TypeTraits<T>::bytes_required(length_);
+  if (bytes_required > 0 && bytes_required < data_->size()) {
+    // Trim buffers
+    RETURN_NOT_OK(data_->Resize(bytes_required));
+  }
+  *out = std::make_shared<typename TypeTraits<T>::ArrayType>(
       type_, length_, data_, null_count_, null_bitmap_);
 
   data_ = null_bitmap_ = nullptr;
   capacity_ = length_ = null_count_ = 0;
-  return result;
+  return Status::OK();
 }
 
 template <>

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index c643783..f21470d 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -91,7 +91,9 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray {
   value_type Value(int i) const { return raw_data()[i]; }
 };
 
-#define NUMERIC_ARRAY_DECL(NAME, TypeClass) using NAME = NumericArray<TypeClass>;
+#define NUMERIC_ARRAY_DECL(NAME, TypeClass) \
+  using NAME = NumericArray<TypeClass>;     \
+  extern template class ARROW_EXPORT NumericArray<TypeClass>;
 
 NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type);
 NUMERIC_ARRAY_DECL(Int8Array, Int8Type);
@@ -139,8 +141,7 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
   Status Append(
       const value_type* values, int32_t length, const uint8_t* valid_bytes = nullptr);
 
-  std::shared_ptr<Array> Finish() override;
-
+  Status Finish(std::shared_ptr<Array>* out) override;
   Status Init(int32_t capacity) override;
 
   // Increase the capacity of the builder to accommodate at least the indicated
@@ -183,77 +184,77 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
 };
 
 template <>
-struct type_traits<UInt8Type> {
+struct TypeTraits<UInt8Type> {
   typedef UInt8Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements; }
 };
 
 template <>
-struct type_traits<Int8Type> {
+struct TypeTraits<Int8Type> {
   typedef Int8Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements; }
 };
 
 template <>
-struct type_traits<UInt16Type> {
+struct TypeTraits<UInt16Type> {
   typedef UInt16Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); }
 };
 
 template <>
-struct type_traits<Int16Type> {
+struct TypeTraits<Int16Type> {
   typedef Int16Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(int16_t); }
 };
 
 template <>
-struct type_traits<UInt32Type> {
+struct TypeTraits<UInt32Type> {
   typedef UInt32Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); }
 };
 
 template <>
-struct type_traits<Int32Type> {
+struct TypeTraits<Int32Type> {
   typedef Int32Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(int32_t); }
 };
 
 template <>
-struct type_traits<UInt64Type> {
+struct TypeTraits<UInt64Type> {
   typedef UInt64Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); }
 };
 
 template <>
-struct type_traits<Int64Type> {
+struct TypeTraits<Int64Type> {
   typedef Int64Array ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
 };
 
 template <>
-struct type_traits<TimestampType> {
+struct TypeTraits<TimestampType> {
   typedef TimestampArray ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
 };
 template <>
 
-struct type_traits<FloatType> {
+struct TypeTraits<FloatType> {
   typedef FloatArray ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(float); }
 };
 
 template <>
-struct type_traits<DoubleType> {
+struct TypeTraits<DoubleType> {
   typedef DoubleArray ArrayType;
 
   static inline int bytes_required(int elements) { return elements * sizeof(double); }
@@ -293,7 +294,7 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
 };
 
 template <>
-struct type_traits<BooleanType> {
+struct TypeTraits<BooleanType> {
   typedef BooleanArray ArrayType;
 
   static inline int bytes_required(int elements) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 6807b00..d897e30 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -66,18 +66,13 @@ class TestStringContainer : public ::testing::Test {
 
   void MakeArray() {
     length_ = offsets_.size() - 1;
-    int nchars = chars_.size();
-
     value_buf_ = test::to_buffer(chars_);
-    values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));
-
     offsets_buf_ = test::to_buffer(offsets_);
-
     null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
     null_count_ = test::null_count(valid_bytes_);
 
     strings_ = std::make_shared<StringArray>(
-        length_, offsets_buf_, values_, null_count_, null_bitmap_);
+        length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
   }
 
  protected:
@@ -94,7 +89,6 @@ class TestStringContainer : public ::testing::Test {
   int null_count_;
   int length_;
 
-  ArrayPtr values_;
   std::shared_ptr<StringArray> strings_;
 };
 
@@ -122,7 +116,7 @@ TEST_F(TestStringContainer, TestListFunctions) {
 
 TEST_F(TestStringContainer, TestDestructor) {
   auto arr = std::make_shared<StringArray>(
-      length_, offsets_buf_, values_, null_count_, null_bitmap_);
+      length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
 }
 
 TEST_F(TestStringContainer, TestGetString) {
@@ -147,7 +141,10 @@ class TestStringBuilder : public TestBuilder {
   }
 
   void Done() {
-    result_ = std::dynamic_pointer_cast<StringArray>(builder_->Finish());
+    std::shared_ptr<Array> out;
+    EXPECT_OK(builder_->Finish(&out));
+
+    result_ = std::dynamic_pointer_cast<StringArray>(out);
     result_->Validate();
   }
 
@@ -178,7 +175,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
 
   ASSERT_EQ(reps * N, result_->length());
   ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->values()->length());
+  ASSERT_EQ(reps * 6, result_->data()->size());
 
   int32_t length;
   int32_t pos = 0;
@@ -218,18 +215,14 @@ class TestBinaryContainer : public ::testing::Test {
 
   void MakeArray() {
     length_ = offsets_.size() - 1;
-    int nchars = chars_.size();
-
     value_buf_ = test::to_buffer(chars_);
-    values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));
-
     offsets_buf_ = test::to_buffer(offsets_);
 
     null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
     null_count_ = test::null_count(valid_bytes_);
 
     strings_ = std::make_shared<BinaryArray>(
-        length_, offsets_buf_, values_, null_count_, null_bitmap_);
+        length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
   }
 
  protected:
@@ -246,7 +239,6 @@ class TestBinaryContainer : public ::testing::Test {
   int null_count_;
   int length_;
 
-  ArrayPtr values_;
   std::shared_ptr<BinaryArray> strings_;
 };
 
@@ -274,7 +266,7 @@ TEST_F(TestBinaryContainer, TestListFunctions) {
 
 TEST_F(TestBinaryContainer, TestDestructor) {
   auto arr = std::make_shared<BinaryArray>(
-      length_, offsets_buf_, values_, null_count_, null_bitmap_);
+      length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
 }
 
 TEST_F(TestBinaryContainer, TestGetValue) {
@@ -298,7 +290,10 @@ class TestBinaryBuilder : public TestBuilder {
   }
 
   void Done() {
-    result_ = std::dynamic_pointer_cast<BinaryArray>(builder_->Finish());
+    std::shared_ptr<Array> out;
+    EXPECT_OK(builder_->Finish(&out));
+
+    result_ = std::dynamic_pointer_cast<BinaryArray>(out);
     result_->Validate();
   }
 
@@ -330,7 +325,7 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) {
   ASSERT_OK(result_->Validate());
   ASSERT_EQ(reps * N, result_->length());
   ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->values()->length());
+  ASSERT_EQ(reps * 6, result_->data()->size());
 
   int32_t length;
   for (int i = 0; i < N * reps; ++i) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/string.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc
index 745ed8f..d692e13 100644
--- a/cpp/src/arrow/types/string.cc
+++ b/cpp/src/arrow/types/string.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/types/string.h"
 
+#include <cstring>
 #include <sstream>
 #include <string>
 
@@ -24,37 +25,77 @@
 
 namespace arrow {
 
-const std::shared_ptr<DataType> BINARY(new BinaryType());
-const std::shared_ptr<DataType> STRING(new StringType());
+static std::shared_ptr<DataType> kBinary = std::make_shared<BinaryType>();
+static std::shared_ptr<DataType> kString = std::make_shared<StringType>();
 
 BinaryArray::BinaryArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
-    const ArrayPtr& values, int32_t null_count,
+    const std::shared_ptr<Buffer>& data, int32_t null_count,
     const std::shared_ptr<Buffer>& null_bitmap)
-    : BinaryArray(BINARY, length, offsets, values, null_count, null_bitmap) {}
+    : BinaryArray(kBinary, length, offsets, data, null_count, null_bitmap) {}
 
 BinaryArray::BinaryArray(const TypePtr& type, int32_t length,
-    const std::shared_ptr<Buffer>& offsets, const ArrayPtr& values, int32_t null_count,
-    const std::shared_ptr<Buffer>& null_bitmap)
-    : ListArray(type, length, offsets, values, null_count, null_bitmap),
-      bytes_(std::dynamic_pointer_cast<UInt8Array>(values).get()),
-      raw_bytes_(bytes_->raw_data()) {
-  // Check in case the dynamic cast fails.
-  DCHECK(bytes_);
+    const std::shared_ptr<Buffer>& offsets, const std::shared_ptr<Buffer>& data,
+    int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
+    : Array(type, length, null_count, null_bitmap),
+      offset_buffer_(offsets),
+      offsets_(reinterpret_cast<const int32_t*>(offset_buffer_->data())),
+      data_buffer_(data),
+      data_(nullptr) {
+  if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); }
 }
 
 Status BinaryArray::Validate() const {
-  if (values()->null_count() > 0) {
-    std::stringstream ss;
-    ss << type()->ToString() << " can have null values in the value array";
-    Status::Invalid(ss.str());
+  // TODO(wesm): what to do here?
+  return Status::OK();
+}
+
+bool BinaryArray::EqualsExact(const BinaryArray& other) const {
+  if (!Array::EqualsExact(other)) { return false; }
+
+  bool equal_offsets =
+      offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
+  if (!equal_offsets) { return false; }
+
+  return data_buffer_->Equals(*other.data_buffer_, data_buffer_->size());
+}
+
+bool BinaryArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  return EqualsExact(*static_cast<const BinaryArray*>(arr.get()));
+}
+
+bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+    const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) { return true; }
+  if (!arr) { return false; }
+  if (this->type_enum() != arr->type_enum()) { return false; }
+  const auto other = static_cast<const BinaryArray*>(arr.get());
+  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+    const bool is_null = IsNull(i);
+    if (is_null != arr->IsNull(o_i)) { return false; }
+    if (is_null) continue;
+    const int32_t begin_offset = offset(i);
+    const int32_t end_offset = offset(i + 1);
+    const int32_t other_begin_offset = other->offset(o_i);
+    const int32_t other_end_offset = other->offset(o_i + 1);
+    // Underlying can't be equal if the size isn't equal
+    if (end_offset - begin_offset != other_end_offset - other_begin_offset) {
+      return false;
+    }
+
+    if (std::memcmp(data_ + begin_offset, other->data_ + other_begin_offset,
+            end_offset - begin_offset)) {
+      return false;
+    }
   }
-  return ListArray::Validate();
+  return true;
 }
 
 StringArray::StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
-    const ArrayPtr& values, int32_t null_count,
+    const std::shared_ptr<Buffer>& data, int32_t null_count,
     const std::shared_ptr<Buffer>& null_bitmap)
-    : StringArray(STRING, length, offsets, values, null_count, null_bitmap) {}
+    : BinaryArray(kString, length, offsets, data, null_count, null_bitmap) {}
 
 Status StringArray::Validate() const {
   // TODO(emkornfield) Validate proper UTF8 code points?
@@ -72,4 +113,28 @@ BinaryBuilder::BinaryBuilder(MemoryPool* pool, const TypePtr& type)
   byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
 }
 
+Status BinaryBuilder::Finish(std::shared_ptr<Array>* out) {
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(ListBuilder::Finish(&result));
+
+  const auto list = std::dynamic_pointer_cast<ListArray>(result);
+  auto values = std::dynamic_pointer_cast<UInt8Array>(list->values());
+
+  *out = std::make_shared<BinaryArray>(list->length(), list->offset_buffer(),
+      values->data(), list->null_count(), list->null_bitmap());
+  return Status::OK();
+}
+
+Status StringBuilder::Finish(std::shared_ptr<Array>* out) {
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(ListBuilder::Finish(&result));
+
+  const auto list = std::dynamic_pointer_cast<ListArray>(result);
+  auto values = std::dynamic_pointer_cast<UInt8Array>(list->values());
+
+  *out = std::make_shared<StringArray>(list->length(), list->offset_buffer(),
+      values->data(), list->null_count(), list->null_bitmap());
+  return Status::OK();
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index bab0c58..aaba49c 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -35,15 +35,16 @@ namespace arrow {
 class Buffer;
 class MemoryPool;
 
-class ARROW_EXPORT BinaryArray : public ListArray {
+class ARROW_EXPORT BinaryArray : public Array {
  public:
   BinaryArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
-      const ArrayPtr& values, int32_t null_count = 0,
+      const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
+
   // Constructor that allows sub-classes/builders to propagate there logical type up the
   // class hierarchy.
   BinaryArray(const TypePtr& type, int32_t length, const std::shared_ptr<Buffer>& offsets,
-      const ArrayPtr& values, int32_t null_count = 0,
+      const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
 
   // Return the pointer to the given elements bytes
@@ -53,28 +54,38 @@ class ARROW_EXPORT BinaryArray : public ListArray {
     DCHECK(out_length);
     const int32_t pos = offsets_[i];
     *out_length = offsets_[i + 1] - pos;
-    return raw_bytes_ + pos;
+    return data_ + pos;
   }
 
+  std::shared_ptr<Buffer> data() const { return data_buffer_; }
+  std::shared_ptr<Buffer> offsets() const { return offset_buffer_; }
+
+  int32_t offset(int i) const { return offsets_[i]; }
+
+  // Neither of these functions will perform boundschecking
+  int32_t value_offset(int i) const { return offsets_[i]; }
+  int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; }
+
+  bool EqualsExact(const BinaryArray& other) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
+  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+      const ArrayPtr& arr) const override;
+
   Status Validate() const override;
 
  private:
-  UInt8Array* bytes_;
-  const uint8_t* raw_bytes_;
+  std::shared_ptr<Buffer> offset_buffer_;
+  const int32_t* offsets_;
+
+  std::shared_ptr<Buffer> data_buffer_;
+  const uint8_t* data_;
 };
 
 class ARROW_EXPORT StringArray : public BinaryArray {
  public:
   StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
-      const ArrayPtr& values, int32_t null_count = 0,
+      const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
-  // Constructor that allows overriding the logical type, so subclasses can propagate
-  // there
-  // up the class hierarchy.
-  StringArray(const TypePtr& type, int32_t length, const std::shared_ptr<Buffer>& offsets,
-      const ArrayPtr& values, int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& null_bitmap = nullptr)
-      : BinaryArray(type, length, offsets, values, null_count, null_bitmap) {}
 
   // Construct a std::string
   // TODO: std::bad_alloc possibility
@@ -98,9 +109,7 @@ class ARROW_EXPORT BinaryBuilder : public ListBuilder {
     return byte_builder_->Append(value, length);
   }
 
-  std::shared_ptr<Array> Finish() override {
-    return ListBuilder::Transfer<BinaryArray>();
-  }
+  Status Finish(std::shared_ptr<Array>* out) override;
 
  protected:
   UInt8Builder* byte_builder_;
@@ -112,6 +121,8 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
   explicit StringBuilder(MemoryPool* pool, const TypePtr& type)
       : BinaryBuilder(pool, type) {}
 
+  Status Finish(std::shared_ptr<Array>* out) override;
+
   Status Append(const std::string& value) { return Append(value.c_str(), value.size()); }
 
   Status Append(const char* value, int32_t length) {
@@ -119,10 +130,6 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
   }
 
   Status Append(const std::vector<std::string>& values, uint8_t* null_bytes);
-
-  std::shared_ptr<Array> Finish() override {
-    return ListBuilder::Transfer<StringArray>();
-  }
 };
 
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index ccf5a52..8e82c38 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -119,7 +119,11 @@ class TestStructBuilder : public TestBuilder {
     ASSERT_EQ(2, static_cast<int>(builder_->field_builders().size()));
   }
 
-  void Done() { result_ = std::dynamic_pointer_cast<StructArray>(builder_->Finish()); }
+  void Done() {
+    std::shared_ptr<Array> out;
+    ASSERT_OK(builder_->Finish(&out));
+    result_ = std::dynamic_pointer_cast<StructArray>(out);
+  }
 
  protected:
   std::vector<FieldPtr> value_fields_;
@@ -294,7 +298,8 @@ TEST_F(TestStructBuilder, TestEquality) {
   for (int32_t value : int_values) {
     int_vb->UnsafeAppend(value);
   }
-  array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&array));
 
   ASSERT_OK(builder_->Resize(list_lengths.size()));
   ASSERT_OK(char_vb->Resize(list_values.size()));
@@ -308,7 +313,8 @@ TEST_F(TestStructBuilder, TestEquality) {
   for (int32_t value : int_values) {
     int_vb->UnsafeAppend(value);
   }
-  equal_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&equal_array));
 
   ASSERT_OK(builder_->Resize(list_lengths.size()));
   ASSERT_OK(char_vb->Resize(list_values.size()));
@@ -323,7 +329,8 @@ TEST_F(TestStructBuilder, TestEquality) {
   for (int32_t value : int_values) {
     int_vb->UnsafeAppend(value);
   }
-  unequal_bitmap_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&unequal_bitmap_array));
 
   ASSERT_OK(builder_->Resize(list_lengths.size()));
   ASSERT_OK(char_vb->Resize(list_values.size()));
@@ -339,7 +346,8 @@ TEST_F(TestStructBuilder, TestEquality) {
   for (int32_t value : int_values) {
     int_vb->UnsafeAppend(value);
   }
-  unequal_offsets_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&unequal_offsets_array));
 
   ASSERT_OK(builder_->Resize(list_lengths.size()));
   ASSERT_OK(char_vb->Resize(list_values.size()));
@@ -354,7 +362,8 @@ TEST_F(TestStructBuilder, TestEquality) {
   for (int32_t value : unequal_int_values) {
     int_vb->UnsafeAppend(value);
   }
-  unequal_values_array = builder_->Finish();
+
+  ASSERT_OK(builder_->Finish(&unequal_values_array));
 
   // Test array equality
   EXPECT_TRUE(array->Equals(array));

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index e8176f0..369c29d 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -87,4 +87,18 @@ Status StructArray::Validate() const {
   return Status::OK();
 }
 
+Status StructBuilder::Finish(std::shared_ptr<Array>* out) {
+  std::vector<std::shared_ptr<Array>> fields(field_builders_.size());
+  for (size_t i = 0; i < field_builders_.size(); ++i) {
+    RETURN_NOT_OK(field_builders_[i]->Finish(&fields[i]));
+  }
+
+  *out = std::make_shared<StructArray>(type_, length_, fields, null_count_, null_bitmap_);
+
+  null_bitmap_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+
+  return Status::OK();
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
index 63955eb..65b8daf 100644
--- a/cpp/src/arrow/types/struct.h
+++ b/cpp/src/arrow/types/struct.h
@@ -73,6 +73,8 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
     field_builders_ = field_builders;
   }
 
+  Status Finish(std::shared_ptr<Array>* out) override;
+
   // Null bitmap is of equal length to every child field, and any zero byte
   // will be considered as a null for that field, but users must using app-
   // end methods or advance methods of the child builders' independently to
@@ -83,21 +85,6 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  std::shared_ptr<Array> Finish() override {
-    std::vector<ArrayPtr> fields;
-    for (auto it : field_builders_) {
-      fields.push_back(it->Finish());
-    }
-
-    auto result =
-        std::make_shared<StructArray>(type_, length_, fields, null_count_, null_bitmap_);
-
-    null_bitmap_ = nullptr;
-    capacity_ = length_ = null_count_ = 0;
-
-    return result;
-  }
-
   // Append an element to the Struct. All child-builders' Append method must
   // be called independently to maintain data-structure consistency.
   Status Append(bool is_valid = true) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/util/status.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc
index 8dd07d0..08e9ae3 100644
--- a/cpp/src/arrow/util/status.cc
+++ b/cpp/src/arrow/util/status.cc
@@ -49,12 +49,18 @@ std::string Status::CodeAsString() const {
     case StatusCode::KeyError:
       type = "Key error";
       break;
+    case StatusCode::TypeError:
+      type = "Type error";
+      break;
     case StatusCode::Invalid:
       type = "Invalid";
       break;
     case StatusCode::IOError:
       type = "IOError";
       break;
+    case StatusCode::UnknownError:
+      type = "Unknown error";
+      break;
     case StatusCode::NotImplemented:
       type = "NotImplemented";
       break;

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/cpp/src/arrow/util/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/util/status.h
index d558531..05f5b74 100644
--- a/cpp/src/arrow/util/status.h
+++ b/cpp/src/arrow/util/status.h
@@ -78,9 +78,10 @@ enum class StatusCode : char {
   OK = 0,
   OutOfMemory = 1,
   KeyError = 2,
-  Invalid = 3,
-  IOError = 4,
-
+  TypeError = 3,
+  Invalid = 4,
+  IOError = 5,
+  UnknownError = 9,
   NotImplemented = 10,
 };
 
@@ -106,6 +107,14 @@ class ARROW_EXPORT Status {
     return Status(StatusCode::KeyError, msg, -1);
   }
 
+  static Status TypeError(const std::string& msg) {
+    return Status(StatusCode::TypeError, msg, -1);
+  }
+
+  static Status UnknownError(const std::string& msg) {
+    return Status(StatusCode::UnknownError, msg, -1);
+  }
+
   static Status NotImplemented(const std::string& msg) {
     return Status(StatusCode::NotImplemented, msg, -1);
   }
@@ -125,6 +134,8 @@ class ARROW_EXPORT Status {
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
   bool IsIOError() const { return code() == StatusCode::IOError; }
+
+  bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
   bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }
 
   // Return a string representation of this status suitable for printing.

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 55f6d05..4357fa0 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -438,8 +438,6 @@ set(PYARROW_SRCS
   src/pyarrow/config.cc
   src/pyarrow/helpers.cc
   src/pyarrow/io.cc
-  src/pyarrow/status.cc
-
   src/pyarrow/adapters/builtin.cc
   src/pyarrow/adapters/pandas.cc
 )

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/doc/index.rst
----------------------------------------------------------------------
diff --git a/python/doc/index.rst b/python/doc/index.rst
index 550e544..88725ba 100644
--- a/python/doc/index.rst
+++ b/python/doc/index.rst
@@ -1,3 +1,20 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
 Apache Arrow (Python)
 =====================
 
@@ -25,4 +42,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/pyarrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pxd b/python/pyarrow/error.pxd
index 891d1ac..4fb46c2 100644
--- a/python/pyarrow/error.pxd
+++ b/python/pyarrow/error.pxd
@@ -16,7 +16,5 @@
 # under the License.
 
 from pyarrow.includes.libarrow cimport CStatus
-from pyarrow.includes.pyarrow cimport PyStatus
 
-cdef int check_cstatus(const CStatus& status) nogil except -1
-cdef int check_status(const PyStatus& status) nogil except -1
+cdef int check_status(const CStatus& status) nogil except -1

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/pyarrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx
index a2c53fe..b8a82b3 100644
--- a/python/pyarrow/error.pyx
+++ b/python/pyarrow/error.pyx
@@ -22,15 +22,7 @@ from pyarrow.compat import frombytes
 class ArrowException(Exception):
     pass
 
-cdef int check_cstatus(const CStatus& status) nogil except -1:
-    if status.ok():
-        return 0
-
-    cdef c_string c_message = status.ToString()
-    with gil:
-        raise ArrowException(frombytes(c_message))
-
-cdef int check_status(const PyStatus& status) nogil except -1:
+cdef int check_status(const CStatus& status) nogil except -1:
     if status.ok():
         return 0
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
index 7c47f21..e1da191 100644
--- a/python/pyarrow/includes/pyarrow.pxd
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -25,36 +25,19 @@ cimport pyarrow.includes.libarrow_io as arrow_io
 
 
 cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
-    # We can later add more of the common status factory methods as needed
-    cdef PyStatus PyStatus_OK "Status::OK"()
-
-    cdef cppclass PyStatus "pyarrow::Status":
-        PyStatus()
-
-        c_string ToString()
-
-        c_bool ok()
-        c_bool IsOutOfMemory()
-        c_bool IsKeyError()
-        c_bool IsTypeError()
-        c_bool IsIOError()
-        c_bool IsValueError()
-        c_bool IsNotImplemented()
-        c_bool IsArrowError()
-
     shared_ptr[CDataType] GetPrimitiveType(Type type)
-    PyStatus ConvertPySequence(object obj, shared_ptr[CArray]* out)
+    CStatus ConvertPySequence(object obj, shared_ptr[CArray]* out)
 
-    PyStatus PandasToArrow(MemoryPool* pool, object ao,
-                           shared_ptr[CArray]* out)
-    PyStatus PandasMaskedToArrow(MemoryPool* pool, object ao, object mo,
-                                 shared_ptr[CArray]* out)
+    CStatus PandasToArrow(MemoryPool* pool, object ao,
+                          shared_ptr[CArray]* out)
+    CStatus PandasMaskedToArrow(MemoryPool* pool, object ao, object mo,
+                                shared_ptr[CArray]* out)
 
-    PyStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr,
-                                  object py_ref, PyObject** out)
+    CStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr,
+                                 object py_ref, PyObject** out)
 
-    PyStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr,
-                                   object py_ref, PyObject** out)
+    CStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr,
+                                  object py_ref, PyObject** out)
 
     MemoryPool* get_memory_pool()
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/pyarrow/io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 8970e06..16ebfa1 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -28,7 +28,7 @@ cimport pyarrow.includes.pyarrow as pyarrow
 from pyarrow.includes.libarrow_io cimport *
 
 from pyarrow.compat import frombytes, tobytes
-from pyarrow.error cimport check_cstatus
+from pyarrow.error cimport check_status
 
 cimport cpython as cp
 
@@ -57,9 +57,9 @@ cdef class NativeFile:
         if self.is_open:
             with nogil:
                 if self.is_readonly:
-                    check_cstatus(self.rd_file.get().Close())
+                    check_status(self.rd_file.get().Close())
                 else:
-                    check_cstatus(self.wr_file.get().Close())
+                    check_status(self.wr_file.get().Close())
         self.is_open = False
 
     cdef read_handle(self, shared_ptr[ReadableFileInterface]* file):
@@ -88,22 +88,22 @@ cdef class NativeFile:
         cdef int64_t size
         self._assert_readable()
         with nogil:
-            check_cstatus(self.rd_file.get().GetSize(&size))
+            check_status(self.rd_file.get().GetSize(&size))
         return size
 
     def tell(self):
         cdef int64_t position
         with nogil:
             if self.is_readonly:
-                check_cstatus(self.rd_file.get().Tell(&position))
+                check_status(self.rd_file.get().Tell(&position))
             else:
-                check_cstatus(self.wr_file.get().Tell(&position))
+                check_status(self.wr_file.get().Tell(&position))
         return position
 
     def seek(self, int64_t position):
         self._assert_readable()
         with nogil:
-            check_cstatus(self.rd_file.get().Seek(position))
+            check_status(self.rd_file.get().Seek(position))
 
     def write(self, data):
         """
@@ -116,7 +116,7 @@ cdef class NativeFile:
         cdef const uint8_t* buf = <const uint8_t*> cp.PyBytes_AS_STRING(data)
         cdef int64_t bufsize = len(data)
         with nogil:
-            check_cstatus(self.wr_file.get().Write(buf, bufsize))
+            check_status(self.wr_file.get().Write(buf, bufsize))
 
     def read(self, int nbytes):
         cdef:
@@ -127,8 +127,7 @@ cdef class NativeFile:
         self._assert_readable()
 
         with nogil:
-            check_cstatus(self.rd_file.get()
-                          .ReadB(nbytes, &out))
+            check_status(self.rd_file.get().ReadB(nbytes, &out))
 
         result = cp.PyBytes_FromStringAndSize(
             <const char*>out.get().data(), out.get().size())
@@ -223,7 +222,7 @@ cdef class InMemoryOutputStream(NativeFile):
     def get_result(self):
         cdef Buffer result = Buffer()
 
-        check_cstatus(self.wr_file.get().Close())
+        check_status(self.wr_file.get().Close())
         result.init(<shared_ptr[CBuffer]> self.buffer)
 
         self.is_open = False
@@ -270,7 +269,7 @@ except ImportError:
 
 def have_libhdfs():
     try:
-        check_cstatus(ConnectLibHdfs())
+        check_status(ConnectLibHdfs())
         return True
     except:
         return False
@@ -304,7 +303,7 @@ cdef class HdfsClient:
     def close(self):
         self._ensure_client()
         with nogil:
-            check_cstatus(self.client.get().Disconnect())
+            check_status(self.client.get().Disconnect())
         self.is_open = False
 
     cdef _ensure_client(self):
@@ -341,8 +340,7 @@ cdef class HdfsClient:
         conf.user = tobytes(user)
 
         with nogil:
-            check_cstatus(
-                CHdfsClient.Connect(&conf, &out.client))
+            check_status(CHdfsClient.Connect(&conf, &out.client))
         out.is_open = True
 
         return out
@@ -383,8 +381,8 @@ cdef class HdfsClient:
         self._ensure_client()
 
         with nogil:
-            check_cstatus(self.client.get()
-                          .ListDirectory(c_path, &listing))
+            check_status(self.client.get()
+                         .ListDirectory(c_path, &listing))
 
         cdef const HdfsPathInfo* info
         for i in range(<int> listing.size()):
@@ -422,8 +420,8 @@ cdef class HdfsClient:
 
         cdef c_string c_path = tobytes(path)
         with nogil:
-            check_cstatus(self.client.get()
-                          .CreateDirectory(c_path))
+            check_status(self.client.get()
+                         .CreateDirectory(c_path))
 
     def delete(self, path, bint recursive=False):
         """
@@ -439,8 +437,8 @@ cdef class HdfsClient:
 
         cdef c_string c_path = tobytes(path)
         with nogil:
-            check_cstatus(self.client.get()
-                          .Delete(c_path, recursive))
+            check_status(self.client.get()
+                         .Delete(c_path, recursive))
 
     def open(self, path, mode='rb', buffer_size=None, replication=None,
              default_block_size=None):
@@ -473,7 +471,7 @@ cdef class HdfsClient:
                 append = True
 
             with nogil:
-                check_cstatus(
+                check_status(
                     self.client.get()
                     .OpenWriteable(c_path, append, c_buffer_size,
                                    c_replication, c_default_block_size,
@@ -484,8 +482,8 @@ cdef class HdfsClient:
             out.is_readonly = False
         else:
             with nogil:
-                check_cstatus(self.client.get()
-                              .OpenReadable(c_path, &rd_handle))
+                check_status(self.client.get()
+                             .OpenReadable(c_path, &rd_handle))
 
             out.rd_file = <shared_ptr[ReadableFileInterface]> rd_handle
             out.is_readonly = True
@@ -579,9 +577,9 @@ cdef class HdfsFile(NativeFile):
         try:
             with nogil:
                 while total_bytes < nbytes:
-                    check_cstatus(self.rd_file.get()
-                                  .Read(rpc_chunksize, &bytes_read,
-                                        buf + total_bytes))
+                    check_status(self.rd_file.get()
+                                 .Read(rpc_chunksize, &bytes_read,
+                                       buf + total_bytes))
 
                     total_bytes += bytes_read
 
@@ -647,8 +645,8 @@ cdef class HdfsFile(NativeFile):
         try:
             while True:
                 with nogil:
-                    check_cstatus(self.rd_file.get()
-                                  .Read(self.buffer_size, &bytes_read, buf))
+                    check_status(self.rd_file.get()
+                                 .Read(self.buffer_size, &bytes_read, buf))
 
                 total_bytes += bytes_read
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/pyarrow/ipc.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/ipc.pyx b/python/pyarrow/ipc.pyx
index f8da3a7..46deb5a 100644
--- a/python/pyarrow/ipc.pyx
+++ b/python/pyarrow/ipc.pyx
@@ -26,7 +26,7 @@ from pyarrow.includes.libarrow_io cimport *
 from pyarrow.includes.libarrow_ipc cimport *
 cimport pyarrow.includes.pyarrow as pyarrow
 
-from pyarrow.error cimport check_cstatus
+from pyarrow.error cimport check_status
 from pyarrow.io cimport NativeFile
 from pyarrow.schema cimport Schema
 from pyarrow.table cimport RecordBatch
@@ -89,8 +89,8 @@ cdef class ArrowFileWriter:
         get_writer(sink, &self.sink)
 
         with nogil:
-            check_cstatus(CFileWriter.Open(self.sink.get(), schema.sp_schema,
-                                           &self.writer))
+            check_status(CFileWriter.Open(self.sink.get(), schema.sp_schema,
+                                          &self.writer))
 
         self.closed = False
 
@@ -101,12 +101,12 @@ cdef class ArrowFileWriter:
     def write_record_batch(self, RecordBatch batch):
         cdef CRecordBatch* bptr = batch.batch
         with nogil:
-            check_cstatus(self.writer.get()
-                          .WriteRecordBatch(bptr.columns(), bptr.num_rows()))
+            check_status(self.writer.get()
+                         .WriteRecordBatch(bptr.columns(), bptr.num_rows()))
 
     def close(self):
         with nogil:
-            check_cstatus(self.writer.get().Close())
+            check_status(self.writer.get().Close())
         self.closed = True
 
 
@@ -124,9 +124,9 @@ cdef class ArrowFileReader:
 
         with nogil:
             if offset != 0:
-                check_cstatus(CFileReader.Open2(reader, offset, &self.reader))
+                check_status(CFileReader.Open2(reader, offset, &self.reader))
             else:
-                check_cstatus(CFileReader.Open(reader, &self.reader))
+                check_status(CFileReader.Open(reader, &self.reader))
 
     property num_dictionaries:
 
@@ -147,7 +147,7 @@ cdef class ArrowFileReader:
             raise ValueError('Batch number {0} out of range'.format(i))
 
         with nogil:
-            check_cstatus(self.reader.get().GetRecordBatch(i, &batch))
+            check_status(self.reader.get().GetRecordBatch(i, &batch))
 
         result = RecordBatch()
         result.init(batch)

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/pyarrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx
index 2abe57b..019dd2c 100644
--- a/python/pyarrow/parquet.pyx
+++ b/python/pyarrow/parquet.pyx
@@ -26,7 +26,7 @@ cimport pyarrow.includes.pyarrow as pyarrow
 
 from pyarrow.compat import tobytes
 from pyarrow.error import ArrowException
-from pyarrow.error cimport check_cstatus
+from pyarrow.error cimport check_status
 from pyarrow.io import NativeFile
 from pyarrow.table cimport Table
 
@@ -62,7 +62,7 @@ cdef class ParquetReader:
         cdef shared_ptr[ReadableFileInterface] cpp_handle
         file.read_handle(&cpp_handle)
 
-        check_cstatus(OpenFile(cpp_handle, &self.allocator, &self.reader))
+        check_status(OpenFile(cpp_handle, &self.allocator, &self.reader))
 
     def read_all(self):
         cdef:
@@ -70,8 +70,8 @@ cdef class ParquetReader:
             shared_ptr[CTable] ctable
 
         with nogil:
-            check_cstatus(self.reader.get()
-                          .ReadFlatTable(&ctable))
+            check_status(self.reader.get()
+                         .ReadFlatTable(&ctable))
 
         table.init(ctable)
         return table
@@ -80,7 +80,7 @@ cdef class ParquetReader:
 def read_table(source, columns=None):
     """
     Read a Table from Parquet format
-    
+
     Returns
     -------
     pyarrow.table.Table
@@ -176,5 +176,5 @@ def write_table(table, filename, chunk_size=None, version=None,
 
     sink.reset(new LocalFileOutputStream(tobytes(filename)))
     with nogil:
-        check_cstatus(WriteFlatTable(ctable_, default_memory_pool(), sink,
-            chunk_size_, properties_builder.build()))
+        check_status(WriteFlatTable(ctable_, default_memory_pool(), sink,
+                                    chunk_size_, properties_builder.build()))

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
index 680f3a5..c034fbd 100644
--- a/python/src/pyarrow/adapters/builtin.cc
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -20,13 +20,14 @@
 
 #include "pyarrow/adapters/builtin.h"
 
-#include <arrow/api.h>
+#include "arrow/api.h"
+#include "arrow/util/status.h"
 
 #include "pyarrow/helpers.h"
-#include "pyarrow/status.h"
 
 using arrow::ArrayBuilder;
 using arrow::DataType;
+using arrow::Status;
 using arrow::Type;
 
 namespace pyarrow {
@@ -129,7 +130,7 @@ class SeqVisitor {
       PyObject* item = item_ref.obj();
 
       if (PyList_Check(item)) {
-        PY_RETURN_NOT_OK(Visit(item, level + 1));
+        RETURN_NOT_OK(Visit(item, level + 1));
       } else if (PyDict_Check(item)) {
         return Status::NotImplemented("No type inference for dicts");
       } else {
@@ -164,9 +165,9 @@ class SeqVisitor {
   Status Validate() const {
     if (scalars_.total_count() > 0) {
       if (num_nesting_levels() > 1) {
-        return Status::ValueError("Mixed nesting levels not supported");
+        return Status::Invalid("Mixed nesting levels not supported");
       } else if (max_observed_level() < max_nesting_level_) {
-        return Status::ValueError("Mixed nesting levels not supported");
+        return Status::Invalid("Mixed nesting levels not supported");
       }
     }
     return Status::OK();
@@ -216,8 +217,8 @@ static Status InferArrowType(PyObject* obj, int64_t* size,
   }
 
   SeqVisitor seq_visitor;
-  PY_RETURN_NOT_OK(seq_visitor.Visit(obj));
-  PY_RETURN_NOT_OK(seq_visitor.Validate());
+  RETURN_NOT_OK(seq_visitor.Visit(obj));
+  RETURN_NOT_OK(seq_visitor.Validate());
 
   *out_type = seq_visitor.GetType();
 
@@ -259,7 +260,7 @@ class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {
  public:
   Status AppendData(PyObject* seq) override {
     Py_ssize_t size = PySequence_Size(seq);
-    RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
       if (item.obj() == Py_None) {
@@ -281,7 +282,7 @@ class Int64Converter : public TypedConverter<arrow::Int64Builder> {
   Status AppendData(PyObject* seq) override {
     int64_t val;
     Py_ssize_t size = PySequence_Size(seq);
-    RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
       if (item.obj() == Py_None) {
@@ -301,7 +302,7 @@ class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
   Status AppendData(PyObject* seq) override {
     double val;
     Py_ssize_t size = PySequence_Size(seq);
-    RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
       if (item.obj() == Py_None) {
@@ -330,7 +331,7 @@ class StringConverter : public TypedConverter<arrow::StringBuilder> {
       OwnedRef holder(item);
 
       if (item == Py_None) {
-        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+        RETURN_NOT_OK(typed_builder_->AppendNull());
         continue;
       } else if (PyUnicode_Check(item)) {
         tmp.reset(PyUnicode_AsUTF8String(item));
@@ -344,7 +345,7 @@ class StringConverter : public TypedConverter<arrow::StringBuilder> {
       // No error checking
       length = PyBytes_GET_SIZE(bytes_obj);
       bytes = PyBytes_AS_STRING(bytes_obj);
-      RETURN_ARROW_NOT_OK(typed_builder_->Append(bytes, length));
+      RETURN_NOT_OK(typed_builder_->Append(bytes, length));
     }
     return Status::OK();
   }
@@ -359,10 +360,10 @@ class ListConverter : public TypedConverter<arrow::ListBuilder> {
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
       if (item.obj() == Py_None) {
-        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+        RETURN_NOT_OK(typed_builder_->AppendNull());
       } else {
         typed_builder_->Append();
-        PY_RETURN_NOT_OK(value_converter_->AppendData(item.obj()));
+        RETURN_NOT_OK(value_converter_->AppendData(item.obj()));
       }
     }
     return Status::OK();
@@ -408,7 +409,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
 Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
   std::shared_ptr<DataType> type;
   int64_t size;
-  PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type));
+  RETURN_NOT_OK(InferArrowType(obj, &size, &type));
 
   // Handle NA / NullType case
   if (type->type == Type::NA) {
@@ -426,14 +427,12 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
 
   // Give the sequence converter an array builder
   std::shared_ptr<ArrayBuilder> builder;
-  RETURN_ARROW_NOT_OK(arrow::MakeBuilder(get_memory_pool(), type, &builder));
+  RETURN_NOT_OK(arrow::MakeBuilder(get_memory_pool(), type, &builder));
   converter->Init(builder);
 
-  PY_RETURN_NOT_OK(converter->AppendData(obj));
+  RETURN_NOT_OK(converter->AppendData(obj));
 
-  *out = builder->Finish();
-
-  return Status::OK();
+  return builder->Finish(out);
 }
 
 } // namespace pyarrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/src/pyarrow/adapters/builtin.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.h b/python/src/pyarrow/adapters/builtin.h
index 4e997e3..2ddfdaa 100644
--- a/python/src/pyarrow/adapters/builtin.h
+++ b/python/src/pyarrow/adapters/builtin.h
@@ -30,14 +30,15 @@
 #include "pyarrow/common.h"
 #include "pyarrow/visibility.h"
 
-namespace arrow { class Array; }
+namespace arrow {
+class Array;
+class Status;
+}
 
 namespace pyarrow {
 
-class Status;
-
 PYARROW_EXPORT
-Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out);
+arrow::Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out);
 
 } // namespace pyarrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/732a2059/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index b2fcd37..5902b83 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -31,10 +31,10 @@
 
 #include "arrow/api.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/status.h"
 
 #include "pyarrow/common.h"
 #include "pyarrow/config.h"
-#include "pyarrow/status.h"
 
 namespace pyarrow {
 
@@ -42,6 +42,8 @@ using arrow::Array;
 using arrow::Column;
 using arrow::Field;
 using arrow::DataType;
+using arrow::Status;
+
 namespace util = arrow::util;
 
 // ----------------------------------------------------------------------
@@ -149,7 +151,7 @@ class ArrowSerializer {
     int null_bytes = util::bytes_for_bits(length_);
 
     null_bitmap_ = std::make_shared<arrow::PoolBuffer>(pool_);
-    RETURN_ARROW_NOT_OK(null_bitmap_->Resize(null_bytes));
+    RETURN_NOT_OK(null_bitmap_->Resize(null_bytes));
 
     null_bitmap_data_ = null_bitmap_->mutable_data();
     memset(null_bitmap_data_, 0, null_bytes);
@@ -171,9 +173,9 @@ class ArrowSerializer {
     PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
     arrow::TypePtr string_type(new arrow::StringType());
     arrow::StringBuilder string_builder(pool_, string_type);
-    RETURN_ARROW_NOT_OK(string_builder.Resize(length_));
+    RETURN_NOT_OK(string_builder.Resize(length_));
 
-    arrow::Status s;
+    Status s;
     PyObject* obj;
     for (int64_t i = 0; i < length_; ++i) {
       obj = objects[i];
@@ -187,18 +189,16 @@ class ArrowSerializer {
         s = string_builder.Append(PyBytes_AS_STRING(obj), length);
         Py_DECREF(obj);
         if (!s.ok()) {
-          return Status::ArrowError(s.ToString());
+          return s;
         }
       } else if (PyBytes_Check(obj)) {
         const int32_t length = PyBytes_GET_SIZE(obj);
-        RETURN_ARROW_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length));
+        RETURN_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length));
       } else {
         string_builder.AppendNull();
       }
     }
-    *out = std::shared_ptr<arrow::Array>(string_builder.Finish());
-
-    return Status::OK();
+    return string_builder.Finish(out);
   }
 
   Status ConvertBooleans(std::shared_ptr<Array>* out) {
@@ -208,7 +208,7 @@ class ArrowSerializer {
 
     int nbytes = util::bytes_for_bits(length_);
     auto data = std::make_shared<arrow::PoolBuffer>(pool_);
-    RETURN_ARROW_NOT_OK(data->Resize(nbytes));
+    RETURN_NOT_OK(data->Resize(nbytes));
     uint8_t* bitmap = data->mutable_data();
     memset(bitmap, 0, nbytes);
 
@@ -305,7 +305,7 @@ inline Status ArrowSerializer<NPY_DATETIME>::MakeDataType(std::shared_ptr<DataTy
           unit = arrow::TimestampType::Unit::NANO;
           break;
       default:
-          return Status::ValueError("Unknown NumPy datetime unit");
+          return Status::Invalid("Unknown NumPy datetime unit");
   }
 
   out->reset(new arrow::TimestampType(unit));
@@ -330,7 +330,7 @@ inline Status ArrowSerializer<TYPE>::Convert(std::shared_ptr<Array>* out) {
   RETURN_NOT_OK(ConvertData());
   std::shared_ptr<DataType> type;
   RETURN_NOT_OK(MakeDataType(&type));
-  RETURN_ARROW_NOT_OK(MakePrimitiveArray(type, length_, data_, null_count, null_bitmap_, out));
+  RETURN_NOT_OK(MakePrimitiveArray(type, length_, data_, null_count, null_bitmap_, out));
   return Status::OK();
 }
 
@@ -389,7 +389,7 @@ template <int TYPE>
 inline Status ArrowSerializer<TYPE>::ConvertData() {
   // TODO(wesm): strided arrays
   if (is_strided()) {
-    return Status::ValueError("no support for strided data yet");
+    return Status::Invalid("no support for strided data yet");
   }
 
   data_ = std::make_shared<NumPyBuffer>(arr_);
@@ -399,12 +399,12 @@ inline Status ArrowSerializer<TYPE>::ConvertData() {
 template <>
 inline Status ArrowSerializer<NPY_BOOL>::ConvertData() {
   if (is_strided()) {
-    return Status::ValueError("no support for strided data yet");
+    return Status::Invalid("no support for strided data yet");
   }
 
   int nbytes = util::bytes_for_bits(length_);
   auto buffer = std::make_shared<arrow::PoolBuffer>(pool_);
-  RETURN_ARROW_NOT_OK(buffer->Resize(nbytes));
+  RETURN_NOT_OK(buffer->Resize(nbytes));
 
   const uint8_t* values = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
 
@@ -446,7 +446,7 @@ Status PandasMaskedToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo,
   }
 
   if (PyArray_NDIM(arr) != 1) {
-    return Status::ValueError("only handle 1-dimensional arrays");
+    return Status::Invalid("only handle 1-dimensional arrays");
   }
 
   switch(PyArray_DESCR(arr)->type_num) {


Mime
View raw message