arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [2/2] arrow git commit: ARROW-1183: [Python] Implement pandas conversions between Time32, Time64 types and datetime.time
Date Sun, 16 Jul 2017 21:29:50 GMT
ARROW-1183: [Python] Implement pandas conversions between Time32, Time64 types and datetime.time

There's also a little bit of code reorganization; sorry about the large diff.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #849 from wesm/ARROW-1183 and squashes the following commits:

7e94e25c [Wes McKinney] Improve error messages to add extra context when using InvalidConversion
9659b903 [Wes McKinney] Always install thrift-cpp in toolchain Parquet MSVC build
41ce47ea [Wes McKinney] Fix MSVC compiler warning
681459c9 [Wes McKinney] Add missing PyDateTime_IMPORT
7024a366 [Wes McKinney] Finish roundtrip of time32/time64 to array of pytime
58fe4c00 [Wes McKinney] Add time to_pandas test
58f99f60 [Wes McKinney] Test from_pandas conversions from pytime
9228ec46 [Wes McKinney] Start in on time conversions


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/50b518af
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/50b518af
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/50b518af

Branch: refs/heads/master
Commit: 50b518afcb03172e6133643eb5cbecd03c63368c
Parents: bf01966
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Sun Jul 16 17:29:43 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Sun Jul 16 17:29:43 2017 -0400

----------------------------------------------------------------------
 ci/msvc-build.bat                           |   16 +-
 cpp/src/arrow/builder.cc                    |   11 +-
 cpp/src/arrow/ipc/metadata.cc               |    4 +-
 cpp/src/arrow/python/builtin_convert.cc     |   21 +-
 cpp/src/arrow/python/builtin_convert.h      |    4 +-
 cpp/src/arrow/python/pandas_convert.cc      |  216 +++--
 cpp/src/arrow/python/util/datetime.h        |   45 +
 cpp/src/arrow/util/logging.h                |    3 +-
 cpp/src/plasma/protocol.cc                  |    4 +-
 python/pyarrow/array.pxi                    | 1119 +---------------------
 python/pyarrow/lib.pyx                      |    6 +
 python/pyarrow/pandas_compat.py             |    2 +
 python/pyarrow/scalar.pxi                   |  376 ++++++++
 python/pyarrow/tests/test_array.py          |    4 +-
 python/pyarrow/tests/test_convert_pandas.py |  106 ++
 python/pyarrow/types.pxi                    |  776 +++++++++++++++
 16 files changed, 1506 insertions(+), 1207 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/ci/msvc-build.bat
----------------------------------------------------------------------
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index be5fd7b..22108ab 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -35,14 +35,26 @@ if "%JOB%" == "Build_Debug" (
 )
 
 conda update --yes --quiet conda
+conda config --set auto_update_conda false
+conda info -a
+
+conda config --set show_channel_urls True
+
+# Help with SSL timeouts to S3
+conda config --set remote_connect_timeout_secs 12
+
+conda config --add channels https://repo.continuum.io/pkgs/free
+conda config --add channels conda-forge
+conda info -a
 
 conda create -n arrow -q -y python=%PYTHON% ^
-      six pytest setuptools numpy pandas cython
+      six pytest setuptools numpy pandas cython ^
+      thrift-cpp
 
 if "%JOB%" == "Toolchain" (
   conda install -n arrow -q -y -c conda-forge ^
       flatbuffers rapidjson cmake git boost-cpp ^
-      thrift-cpp snappy zlib brotli gflags lz4-c zstd
+      snappy zlib brotli gflags lz4-c zstd
 )
 
 call activate arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index e466838..885c650 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -984,16 +984,17 @@ Status DecimalBuilder::Finish(std::shared_ptr<Array>* out) {
 
 ListBuilder::ListBuilder(MemoryPool* pool, std::unique_ptr<ArrayBuilder> value_builder,
     const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(
-          pool, type ? type : std::static_pointer_cast<DataType>(
-                                  std::make_shared<ListType>(value_builder->type()))),
+    : ArrayBuilder(pool,
+          type ? type : std::static_pointer_cast<DataType>(
+                            std::make_shared<ListType>(value_builder->type()))),
       offset_builder_(pool),
       value_builder_(std::move(value_builder)) {}
 
 ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<Array> values,
     const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(pool, type ? type : std::static_pointer_cast<DataType>(
-                                           std::make_shared<ListType>(values->type()))),
+    : ArrayBuilder(pool,
+          type ? type : std::static_pointer_cast<DataType>(
+                            std::make_shared<ListType>(values->type()))),
       offset_builder_(pool),
       values_(values) {}
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
index 5b2ca3b..49c24c7 100644
--- a/cpp/src/arrow/ipc/metadata.cc
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -933,9 +933,7 @@ const void* Message::header() const {
 bool Message::Equals(const Message& other) const {
   int64_t metadata_bytes = std::min(metadata()->size(), other.metadata()->size());
 
-  if (!metadata()->Equals(*other.metadata(), metadata_bytes)) {
-    return false;
-  }
+  if (!metadata()->Equals(*other.metadata(), metadata_bytes)) { return false; }
 
   // Compare bodies, if they have them
   auto this_body = body();

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 816f95a..83154bb 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -44,7 +44,8 @@ static inline bool IsPyInteger(PyObject* obj) {
 #endif
 }
 
-Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
+Status InvalidConversion(PyObject* obj, const std::string& expected_types,
+    std::ostream* out) {
   OwnedRef type(PyObject_Type(obj));
   RETURN_IF_PYERROR();
   DCHECK_NE(type.obj(), nullptr);
@@ -63,10 +64,10 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
 
   std::string cpp_type_name(bytes, size);
 
-  std::stringstream ss;
-  ss << "Python object of type " << cpp_type_name << " is not None and is not a "
-     << expected_type_name << " object";
-  return Status::Invalid(ss.str());
+  (*out) << "Got Python object of type " << cpp_type_name
+         << " but can only handle these types: "
+         << expected_types;
+  return Status::OK();
 }
 
 class ScalarVisitor {
@@ -462,7 +463,10 @@ class BytesConverter : public TypedConverterVisitor<BinaryBuilder, BytesConverte
     } else if (PyBytes_Check(item.obj())) {
       bytes_obj = item.obj();
     } else {
-      return InvalidConversion(item.obj(), "bytes");
+      std::stringstream ss;
+      ss << "Error converting to Binary type: ";
+      RETURN_NOT_OK(InvalidConversion(item.obj(), "bytes", &ss));
+      return Status::Invalid(ss.str());
     }
     // No error checking
     length = PyBytes_GET_SIZE(bytes_obj);
@@ -489,7 +493,10 @@ class FixedWidthBytesConverter
     } else if (PyBytes_Check(item.obj())) {
       bytes_obj = item.obj();
     } else {
-      return InvalidConversion(item.obj(), "bytes");
+      std::stringstream ss;
+      ss << "Error converting to FixedSizeBinary type: ";
+      RETURN_NOT_OK(InvalidConversion(item.obj(), "bytes", &ss));
+      return Status::Invalid(ss.str());
     }
     // No error checking
     RETURN_NOT_OK(CheckPythonBytesAreFixedLength(bytes_obj, expected_length));

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
index dd878b2..4f84fbb 100644
--- a/cpp/src/arrow/python/builtin_convert.h
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -24,6 +24,7 @@
 #include "arrow/python/platform.h"
 
 #include <memory>
+#include <ostream>
 #include <string>
 
 #include "arrow/type.h"
@@ -62,7 +63,8 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>
     const std::shared_ptr<DataType>& type, int64_t size);
 
 ARROW_EXPORT
-Status InvalidConversion(PyObject* obj, const std::string& expected_type_name);
+Status InvalidConversion(
+    PyObject* obj, const std::string& expected_type_name, std::ostream* out);
 
 ARROW_EXPORT Status CheckPythonBytesAreFixedLength(
     PyObject* obj, Py_ssize_t expected_length);

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index 83cd35a..c520d8d 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -189,7 +189,10 @@ static Status AppendObjectStrings(
       const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj));
       RETURN_NOT_OK(builder->Append(PyBytes_AS_STRING(obj), length));
     } else {
-      return InvalidConversion(obj, "string or bytes");
+      std::stringstream ss;
+      ss << "Error converting to Python objects to String/UTF8: ";
+      RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
+      return Status::Invalid(ss.str());
     }
   }
 
@@ -230,7 +233,10 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas
       RETURN_NOT_OK(
           builder->Append(reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(obj))));
     } else {
-      return InvalidConversion(obj, "string or bytes");
+      std::stringstream ss;
+      ss << "Error converting to Python objects to FixedSizeBinary: ";
+      RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
+      return Status::Invalid(ss.str());
     }
   }
 
@@ -416,6 +422,7 @@ class PandasConverter {
       const std::shared_ptr<DataType>& type, ListBuilder& list_builder, PyObject* list);
   Status ConvertObjects();
   Status ConvertDecimals();
+  Status ConvertTimes();
 
  protected:
   MemoryPool* pool_;
@@ -536,7 +543,6 @@ Status PandasConverter::ConvertDates() {
   /// datetime API otherwise
   PyDateTime_IMPORT;
 
-  Status s;
   PyObject* obj;
   for (int64_t i = 0; i < length_; ++i) {
     obj = objects[i];
@@ -545,7 +551,11 @@ Status PandasConverter::ConvertDates() {
     } else if (PandasObjectIsNull(obj)) {
       RETURN_NOT_OK(date_builder.AppendNull());
     } else {
-      return InvalidConversion(obj, "date");
+      std::stringstream ss;
+      ss << "Error converting from Python objects to "
+         << type_->ToString() << ": ";
+      RETURN_NOT_OK(InvalidConversion(obj, "datetime.date", &ss));
+      return Status::Invalid(ss.str());
     }
   }
   return date_builder.Finish(&out_);
@@ -582,7 +592,6 @@ Status PandasConverter::ConvertDecimals() {
 
   const int bit_width = std::dynamic_pointer_cast<DecimalType>(type_)->bit_width();
   DecimalBuilder decimal_builder(pool_, type_);
-
   RETURN_NOT_OK(decimal_builder.Resize(length_));
 
   for (int64_t i = 0; i < length_; ++i) {
@@ -598,12 +607,45 @@ Status PandasConverter::ConvertDecimals() {
     } else if (PandasObjectIsNull(object)) {
       RETURN_NOT_OK(decimal_builder.AppendNull());
     } else {
-      return InvalidConversion(object, "decimal.Decimal");
+      std::stringstream ss;
+      ss << "Error converting from Python objects to "
+         << type_->ToString() << ": ";
+      RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss));
+      return Status::Invalid(ss.str());
     }
   }
   return decimal_builder.Finish(&out_);
 }
 
+Status PandasConverter::ConvertTimes() {
+  // Convert array of datetime.time objects to Arrow
+  PyAcquireGIL lock;
+  PyDateTime_IMPORT;
+
+  PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+
+  // datetime.time stores microsecond resolution
+  Time64Builder builder(pool_, ::arrow::time64(TimeUnit::MICRO));
+  RETURN_NOT_OK(builder.Resize(length_));
+
+  PyObject* obj;
+  for (int64_t i = 0; i < length_; ++i) {
+    obj = objects[i];
+    if (PyTime_Check(obj)) {
+      RETURN_NOT_OK(builder.Append(PyTime_to_us(obj)));
+    } else if (PandasObjectIsNull(obj)) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      std::stringstream ss;
+      ss << "Error converting from Python objects to "
+         << type_->ToString() << ": ";
+      RETURN_NOT_OK(InvalidConversion(obj, "datetime.time", &ss));
+      return Status::Invalid(ss.str());
+    }
+  }
+  return builder.Finish(&out_);
+}
+
 #undef CONVERT_DECIMAL_CASE
 
 Status PandasConverter::ConvertObjectStrings() {
@@ -631,9 +673,6 @@ Status PandasConverter::ConvertObjectStrings() {
 Status PandasConverter::ConvertObjectFloats() {
   PyAcquireGIL lock;
 
-  DoubleBuilder builder(pool_);
-  RETURN_NOT_OK(builder.Resize(length_));
-
   Ndarray1DIndexer<PyObject*> objects(arr_);
   Ndarray1DIndexer<uint8_t> mask_values;
 
@@ -643,6 +682,9 @@ Status PandasConverter::ConvertObjectFloats() {
     have_mask = true;
   }
 
+  DoubleBuilder builder(pool_);
+  RETURN_NOT_OK(builder.Resize(length_));
+
   PyObject* obj;
   for (int64_t i = 0; i < objects.size(); ++i) {
     obj = objects[i];
@@ -653,7 +695,11 @@ Status PandasConverter::ConvertObjectFloats() {
       RETURN_IF_PYERROR();
       RETURN_NOT_OK(builder.Append(val));
     } else {
-      return InvalidConversion(obj, "float");
+      std::stringstream ss;
+      ss << "Error converting from Python objects to "
+         << type_->ToString() << ": ";
+      RETURN_NOT_OK(InvalidConversion(obj, "float", &ss));
+      return Status::Invalid(ss.str());
     }
   }
 
@@ -685,7 +731,11 @@ Status PandasConverter::ConvertObjectIntegers() {
       RETURN_IF_PYERROR();
       RETURN_NOT_OK(builder.Append(val));
     } else {
-      return InvalidConversion(obj, "integer");
+      std::stringstream ss;
+      ss << "Error converting from Python objects to "
+         << type_->ToString() << ": ";
+      RETURN_NOT_OK(InvalidConversion(obj, "integer", &ss));
+      return Status::Invalid(ss.str());
     }
   }
 
@@ -791,6 +841,36 @@ static Status ConvertDecimals(const ChunkedArray& data, PyObject** out_values) {
   return Status::OK();
 }
 
+template <typename TYPE>
+static Status ConvertTimes(const ChunkedArray& data, PyObject** out_values) {
+  using ArrayType = typename TypeTraits<TYPE>::ArrayType;
+
+  PyAcquireGIL lock;
+  OwnedRef time_ref;
+
+  PyDateTime_IMPORT;
+
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = static_cast<const ArrayType&>(*data.chunk(c));
+    auto type = std::dynamic_pointer_cast<TYPE>(arr.type());
+    DCHECK(type);
+
+    const TimeUnit::type unit = type->unit();
+
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      if (arr.IsNull(i)) {
+        Py_INCREF(Py_None);
+        *out_values++ = Py_None;
+      } else {
+        RETURN_NOT_OK(PyTime_from_int(arr.Value(i), unit, out_values++));
+        RETURN_IF_PYERROR();
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
 Status PandasConverter::ConvertBooleans() {
   PyAcquireGIL lock;
 
@@ -821,7 +901,11 @@ Status PandasConverter::ConvertBooleans() {
     } else if (obj == Py_False) {
       BitUtil::SetBit(null_bitmap_data_, i);
     } else {
-      return InvalidConversion(obj, "bool");
+      std::stringstream ss;
+      ss << "Error converting from Python objects to "
+         << type_->ToString() << ": ";
+      RETURN_NOT_OK(InvalidConversion(obj, "bool", &ss));
+      return Status::Invalid(ss.str());
     }
   }
 
@@ -882,28 +966,35 @@ Status PandasConverter::ConvertObjects() {
     RETURN_NOT_OK(ImportFromModule(decimal, "Decimal", &Decimal));
 
     for (int64_t i = 0; i < length_; ++i) {
-      if (PandasObjectIsNull(objects[i])) {
+      PyObject* obj = objects[i];
+      if (PandasObjectIsNull(obj)) {
         continue;
-      } else if (PyObject_is_string(objects[i])) {
+      } else if (PyObject_is_string(obj)) {
         return ConvertObjectStrings();
-      } else if (PyObject_is_float(objects[i])) {
+      } else if (PyObject_is_float(obj)) {
         return ConvertObjectFloats();
-      } else if (PyBool_Check(objects[i])) {
+      } else if (PyBool_Check(obj)) {
         return ConvertBooleans();
-      } else if (PyObject_is_integer(objects[i])) {
+      } else if (PyObject_is_integer(obj)) {
         return ConvertObjectIntegers();
-      } else if (PyDate_CheckExact(objects[i])) {
+      } else if (PyDate_CheckExact(obj)) {
         // We could choose Date32 or Date64
         return ConvertDates<Date32Type>();
-      } else if (PyObject_IsInstance(const_cast<PyObject*>(objects[i]), Decimal.obj())) {
+      } else if (PyTime_Check(obj)) {
+        return ConvertTimes();
+      } else if (PyObject_IsInstance(const_cast<PyObject*>(obj), Decimal.obj())) {
         return ConvertDecimals();
-      } else if (PyList_Check(objects[i]) || PyArray_Check(objects[i])) {
+      } else if (PyList_Check(obj) || PyArray_Check(obj)) {
         std::shared_ptr<DataType> inferred_type;
-        RETURN_NOT_OK(InferArrowType(objects[i], &inferred_type));
+        RETURN_NOT_OK(InferArrowType(obj, &inferred_type));
         return ConvertLists(inferred_type);
       } else {
-        return InvalidConversion(const_cast<PyObject*>(objects[i]),
-            "string, bool, float, int, date, decimal, list, array");
+        const std::string supported_types =
+          "string, bool, float, int, date, time, decimal, list, array";
+        std::stringstream ss;
+        ss << "Error inferring Arrow type for Python object array. ";
+        RETURN_NOT_OK(InvalidConversion(obj, supported_types, &ss));
+        return Status::Invalid(ss.str());
       }
     }
   }
@@ -1187,7 +1278,6 @@ class PandasBlock {
     INT64,
     FLOAT,
     DOUBLE,
-    DECIMAL,
     BOOL,
     DATETIME,
     DATETIME_WITH_TZ,
@@ -1598,6 +1688,10 @@ class ObjectBlock : public PandasBlock {
       RETURN_NOT_OK(ConvertBinaryLike<StringType>(data, out_buffer));
     } else if (type == Type::FIXED_SIZE_BINARY) {
       RETURN_NOT_OK(ConvertFixedSizeBinary(data, out_buffer));
+    } else if (type == Type::TIME32) {
+      RETURN_NOT_OK(ConvertTimes<Time32Type>(data, out_buffer));
+    } else if (type == Type::TIME64) {
+      RETURN_NOT_OK(ConvertTimes<Time64Type>(data, out_buffer));
     } else if (type == Type::DECIMAL) {
       RETURN_NOT_OK(ConvertDecimals(data, out_buffer));
     } else if (type == Type::NA) {
@@ -1950,7 +2044,6 @@ Status MakeBlock(PandasBlock::type type, int64_t num_rows, int num_columns,
     BLOCK_CASE(DOUBLE, Float64Block);
     BLOCK_CASE(BOOL, BoolBlock);
     BLOCK_CASE(DATETIME, DatetimeBlock);
-    BLOCK_CASE(DECIMAL, ObjectBlock);
     default:
       return Status::NotImplemented("Unsupported block type");
   }
@@ -2052,9 +2145,14 @@ class DataFrameBlockCreator {
         case Type::DOUBLE:
           output_type = PandasBlock::DOUBLE;
           break;
+        case Type::NA:
         case Type::STRING:
         case Type::BINARY:
         case Type::FIXED_SIZE_BINARY:
+        case Type::STRUCT:
+        case Type::TIME32:
+        case Type::TIME64:
+        case Type::DECIMAL:
           output_type = PandasBlock::OBJECT;
           break;
         case Type::DATE32:
@@ -2084,13 +2182,6 @@ class DataFrameBlockCreator {
         case Type::DICTIONARY:
           output_type = PandasBlock::CATEGORICAL;
           break;
-        case Type::DECIMAL:
-          output_type = PandasBlock::DECIMAL;
-          break;
-        case Type::NA:
-        case Type::STRUCT:
-          output_type = PandasBlock::OBJECT;
-          break;
         default:
           std::stringstream ss;
           ss << "No known equivalent Pandas block for Arrow data of type ";
@@ -2386,40 +2477,45 @@ class ArrowDeserializer {
     return Status::OK();
   }
 
-  // Boolean specialization
-  Status Visit(const BooleanType& type) {
-    if (data_.null_count() > 0) {
-      RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
-      auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-      RETURN_NOT_OK(ConvertBooleanWithNulls(data_, out_values));
-    } else {
-      RETURN_NOT_OK(AllocateOutput(arrow_traits<Type::BOOL>::npy_type));
-      auto out_values = reinterpret_cast<uint8_t*>(PyArray_DATA(arr_));
-      ConvertBooleanNoNulls(data_, out_values);
-    }
-    return Status::OK();
+  template <typename FUNCTOR>
+  inline Status VisitObjects(FUNCTOR func) {
+    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
+    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+    return func(data_, out_values);
   }
 
   // UTF8 strings
   template <typename Type>
   typename std::enable_if<std::is_base_of<BinaryType, Type>::value, Status>::type Visit(
       const Type& type) {
-    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
-    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return ConvertBinaryLike<Type>(data_, out_values);
+    return VisitObjects(ConvertBinaryLike<Type>);
   }
 
+  Status Visit(const NullType& type) { return VisitObjects(ConvertNulls); }
+
   // Fixed length binary strings
   Status Visit(const FixedSizeBinaryType& type) {
-    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
-    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return ConvertFixedSizeBinary(data_, out_values);
+    return VisitObjects(ConvertFixedSizeBinary);
   }
 
-  Status Visit(const DecimalType& type) {
-    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
-    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return ConvertDecimals(data_, out_values);
+  Status Visit(const DecimalType& type) { return VisitObjects(ConvertDecimals); }
+
+  Status Visit(const Time32Type& type) { return VisitObjects(ConvertTimes<Time32Type>); }
+
+  Status Visit(const Time64Type& type) { return VisitObjects(ConvertTimes<Time64Type>); }
+
+  Status Visit(const StructType& type) { return VisitObjects(ConvertStruct); }
+
+  // Boolean specialization
+  Status Visit(const BooleanType& type) {
+    if (data_.null_count() > 0) {
+      return VisitObjects(ConvertBooleanWithNulls);
+    } else {
+      RETURN_NOT_OK(AllocateOutput(arrow_traits<Type::BOOL>::npy_type));
+      auto out_values = reinterpret_cast<uint8_t*>(PyArray_DATA(arr_));
+      ConvertBooleanNoNulls(data_, out_values);
+    }
+    return Status::OK();
   }
 
   Status Visit(const ListType& type) {
@@ -2479,18 +2575,6 @@ class ArrowDeserializer {
     return Status::OK();
   }
 
-  Status Visit(const NullType& type) {
-    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
-    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return ConvertNulls(data_, out_values);
-  }
-
-  Status Visit(const StructType& type) {
-    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
-    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return ConvertStruct(data_, out_values);
-  }
-
   Status Visit(const UnionType& type) { return Status::NotImplemented("union type"); }
 
   Status Convert(PyObject** out) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/arrow/python/util/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h
index 7ebf46a..d32421e 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -24,6 +24,51 @@
 namespace arrow {
 namespace py {
 
+static inline int64_t PyTime_to_us(PyObject* pytime) {
+  return (static_cast<int64_t>(PyDateTime_TIME_GET_HOUR(pytime)) * 3600000000LL +
+          static_cast<int64_t>(PyDateTime_TIME_GET_MINUTE(pytime)) * 60000000LL +
+          static_cast<int64_t>(PyDateTime_TIME_GET_SECOND(pytime)) * 1000000LL +
+          PyDateTime_TIME_GET_MICROSECOND(pytime));
+}
+
+static inline Status PyTime_from_int(
+    int64_t val, const TimeUnit::type unit, PyObject** out) {
+  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
+  switch (unit) {
+    case TimeUnit::NANO:
+      if (val % 1000 != 0) {
+        std::stringstream ss;
+        ss << "Value " << val << " has non-zero nanoseconds";
+        return Status::Invalid(ss.str());
+      }
+      val /= 1000;
+    // fall through
+    case TimeUnit::MICRO:
+      microsecond = val - (val / 1000000LL) * 1000000LL;
+      val /= 1000000LL;
+      second = val - (val / 60) * 60;
+      val /= 60;
+      minute = val - (val / 60) * 60;
+      hour = val / 60;
+      break;
+    case TimeUnit::MILLI:
+      microsecond = (val - (val / 1000) * 1000) * 1000;
+      val /= 1000;
+    // fall through
+    case TimeUnit::SECOND:
+      second = val - (val / 60) * 60;
+      val /= 60;
+      minute = val - (val / 60) * 60;
+      hour = val / 60;
+      break;
+    default:
+      break;
+  }
+  *out = PyTime_FromTime(static_cast<int32_t>(hour), static_cast<int32_t>(minute),
+      static_cast<int32_t>(second), static_cast<int32_t>(microsecond));
+  return Status::OK();
+}
+
 static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
   struct tm date = {0};
   date.tm_year = PyDateTime_GET_YEAR(pydate) - 1900;

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/arrow/util/logging.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index b618121..2fec4fa 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -103,8 +103,7 @@ class NullLog {
 class CerrLog {
  public:
   CerrLog(int severity)  // NOLINT(runtime/explicit)
-      : severity_(severity),
-        has_logged_(false) {}
+      : severity_(severity), has_logged_(false) {}
 
   virtual ~CerrLog() {
     if (has_logged_) { std::cerr << std::endl; }

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/cpp/src/plasma/protocol.cc
----------------------------------------------------------------------
diff --git a/cpp/src/plasma/protocol.cc b/cpp/src/plasma/protocol.cc
index 246aa29..9739d77 100644
--- a/cpp/src/plasma/protocol.cc
+++ b/cpp/src/plasma/protocol.cc
@@ -38,8 +38,8 @@ to_flatbuffer(flatbuffers::FlatBufferBuilder* fbb, const ObjectID* object_ids,
 Status PlasmaReceive(int sock, int64_t message_type, std::vector<uint8_t>* buffer) {
   int64_t type;
   RETURN_NOT_OK(ReadMessage(sock, &type, buffer));
-  ARROW_CHECK(type == message_type) << "type = " << type
-                                    << ", message_type = " << message_type;
+  ARROW_CHECK(type == message_type)
+      << "type = " << type << ", message_type = " << message_type;
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/python/pyarrow/array.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index ae9ff88..f8bccc7 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -15,1122 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from pyarrow.includes.libarrow cimport *
-
-# These are imprecise because the type (in pandas 0.x) depends on the presence
-# of nulls
-cdef dict _pandas_type_map = {
-    _Type_NA: np.float64,  # NaNs
-    _Type_BOOL: np.bool_,
-    _Type_INT8: np.int8,
-    _Type_INT16: np.int16,
-    _Type_INT32: np.int32,
-    _Type_INT64: np.int64,
-    _Type_UINT8: np.uint8,
-    _Type_UINT16: np.uint16,
-    _Type_UINT32: np.uint32,
-    _Type_UINT64: np.uint64,
-    _Type_HALF_FLOAT: np.float16,
-    _Type_FLOAT: np.float32,
-    _Type_DOUBLE: np.float64,
-    _Type_DATE32: np.dtype('datetime64[ns]'),
-    _Type_DATE64: np.dtype('datetime64[ns]'),
-    _Type_TIMESTAMP: np.dtype('datetime64[ns]'),
-    _Type_BINARY: np.object_,
-    _Type_FIXED_SIZE_BINARY: np.object_,
-    _Type_STRING: np.object_,
-    _Type_LIST: np.object_,
-    _Type_DECIMAL: np.object_,
-}
-
-
-cdef class DataType:
-
-    def __cinit__(self):
-        pass
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        self.sp_type = type
-        self.type = type.get()
-
-    property id:
-
-        def __get__(self):
-            return self.type.id()
-
-    def __str__(self):
-        if self.type is NULL:
-            raise TypeError(
-                '{} is incomplete. The correct way to construct types is '
-                'through public API functions named '
-                'pyarrow.int64, pyarrow.list_, etc.'.format(
-                    type(self).__name__
-                )
-            )
-        return frombytes(self.type.ToString())
-
-    def __repr__(self):
-        return '{0.__class__.__name__}({0})'.format(self)
-
-    def __richcmp__(DataType self, DataType other, int op):
-        if op == cp.Py_EQ:
-            return self.type.Equals(deref(other.type))
-        elif op == cp.Py_NE:
-            return not self.type.Equals(deref(other.type))
-        else:
-            raise TypeError('Invalid comparison')
-
-    def to_pandas_dtype(self):
-        """
-        Return the NumPy dtype that would be used for storing this
-        """
-        cdef Type type_id = self.type.id()
-        if type_id in _pandas_type_map:
-            return _pandas_type_map[type_id]
-        else:
-            raise NotImplementedError(str(self))
-
-
-cdef class DictionaryType(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.dict_type = <const CDictionaryType*> type.get()
-
-
-cdef class ListType(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.list_type = <const CListType*> type.get()
-
-    property value_type:
-
-        def __get__(self):
-            return pyarrow_wrap_data_type(self.list_type.value_type())
-
-
-cdef class TimestampType(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.ts_type = <const CTimestampType*> type.get()
-
-    property unit:
-
-        def __get__(self):
-            return timeunit_to_string(self.ts_type.unit())
-
-    property tz:
-
-        def __get__(self):
-            if self.ts_type.timezone().size() > 0:
-                return frombytes(self.ts_type.timezone())
-            else:
-                return None
-
-
-cdef class Time32Type(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.time_type = <const CTime32Type*> type.get()
-
-    property unit:
-
-        def __get__(self):
-            return timeunit_to_string(self.time_type.unit())
-
-
-cdef class Time64Type(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.time_type = <const CTime64Type*> type.get()
-
-    property unit:
-
-        def __get__(self):
-            return timeunit_to_string(self.time_type.unit())
-
-
-cdef class FixedSizeBinaryType(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.fixed_size_binary_type = (
-            <const CFixedSizeBinaryType*> type.get())
-
-    property byte_width:
-
-        def __get__(self):
-            return self.fixed_size_binary_type.byte_width()
-
-
-cdef class DecimalType(FixedSizeBinaryType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.decimal_type = <const CDecimalType*> type.get()
-
-    property precision:
-
-        def __get__(self):
-            return self.decimal_type.precision()
-
-    property scale:
-
-        def __get__(self):
-            return self.decimal_type.scale()
-
-
-cdef class Field:
-    """
-    Represents a named field, with a data type, nullability, and optional
-    metadata
-
-    Notes
-    -----
-    Do not use this class's constructor directly; use pyarrow.field
-    """
-    def __cinit__(self):
-        pass
-
-    cdef void init(self, const shared_ptr[CField]& field):
-        self.sp_field = field
-        self.field = field.get()
-        self.type = pyarrow_wrap_data_type(field.get().type())
-
-    def equals(self, Field other):
-        """
-        Test if this field is equal to the other
-        """
-        return self.field.Equals(deref(other.field))
-
-    def __str__(self):
-        self._check_null()
-        return 'pyarrow.Field<{0}>'.format(frombytes(self.field.ToString()))
-
-    def __repr__(self):
-        return self.__str__()
-
-    property nullable:
-
-        def __get__(self):
-            self._check_null()
-            return self.field.nullable()
-
-    property name:
-
-        def __get__(self):
-            self._check_null()
-            return frombytes(self.field.name())
-
-    property metadata:
-
-        def __get__(self):
-            self._check_null()
-            return box_metadata(self.field.metadata().get())
-
-    def _check_null(self):
-        if self.field == NULL:
-            raise ReferenceError(
-                'Field not initialized (references NULL pointer)')
-
-    def add_metadata(self, dict metadata):
-        """
-        Add metadata as dict of string keys and values to Field
-
-        Parameters
-        ----------
-        metadata : dict
-            Keys and values must be string-like / coercible to bytes
-
-        Returns
-        -------
-        field : pyarrow.Field
-        """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
-        convert_metadata(metadata, &c_meta)
-
-        cdef shared_ptr[CField] new_field
-        with nogil:
-            check_status(self.field.AddMetadata(c_meta, &new_field))
-
-        return pyarrow_wrap_field(new_field)
-
-    def remove_metadata(self):
-        """
-        Create new field without metadata, if any
-
-        Returns
-        -------
-        field : pyarrow.Field
-        """
-        cdef shared_ptr[CField] new_field
-        with nogil:
-            new_field = self.field.RemoveMetadata()
-        return pyarrow_wrap_field(new_field)
-
-
-cdef class Schema:
-
-    def __cinit__(self):
-        pass
-
-    def __len__(self):
-        return self.schema.num_fields()
-
-    def __getitem__(self, int64_t i):
-
-        cdef:
-            Field result = Field()
-            int64_t num_fields = self.schema.num_fields()
-            int64_t index
-
-        if not -num_fields <= i < num_fields:
-            raise IndexError(
-                'Schema field index {:d} is out of range'.format(i)
-            )
-
-        index = i if i >= 0 else num_fields + i
-        assert index >= 0
-
-        result.init(self.schema.field(index))
-        result.type = pyarrow_wrap_data_type(result.field.type())
-
-        return result
-
-    cdef void init(self, const vector[shared_ptr[CField]]& fields):
-        self.schema = new CSchema(fields)
-        self.sp_schema.reset(self.schema)
-
-    cdef void init_schema(self, const shared_ptr[CSchema]& schema):
-        self.schema = schema.get()
-        self.sp_schema = schema
-
-    property names:
-
-        def __get__(self):
-            cdef int i
-            result = []
-            for i in range(self.schema.num_fields()):
-                name = frombytes(self.schema.field(i).get().name())
-                result.append(name)
-            return result
-
-    property metadata:
-
-        def __get__(self):
-            return box_metadata(self.schema.metadata().get())
-
-    def equals(self, other):
-        """
-        Test if this schema is equal to the other
-        """
-        cdef Schema _other
-        _other = other
-
-        return self.sp_schema.get().Equals(deref(_other.schema))
-
-    def field_by_name(self, name):
-        """
-        Access a field by its name rather than the column index.
-
-        Parameters
-        ----------
-        name: str
-
-        Returns
-        -------
-        field: pyarrow.Field
-        """
-        return pyarrow_wrap_field(self.schema.GetFieldByName(tobytes(name)))
-
-    def get_field_index(self, name):
-        return self.schema.GetFieldIndex(tobytes(name))
-
-    def add_metadata(self, dict metadata):
-        """
-        Add metadata as dict of string keys and values to Schema
-
-        Parameters
-        ----------
-        metadata : dict
-            Keys and values must be string-like / coercible to bytes
-
-        Returns
-        -------
-        schema : pyarrow.Schema
-        """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
-        convert_metadata(metadata, &c_meta)
-
-        cdef shared_ptr[CSchema] new_schema
-        with nogil:
-            check_status(self.schema.AddMetadata(c_meta, &new_schema))
-
-        return pyarrow_wrap_schema(new_schema)
-
-    def remove_metadata(self):
-        """
-        Create new schema without metadata, if any
-
-        Returns
-        -------
-        schema : pyarrow.Schema
-        """
-        cdef shared_ptr[CSchema] new_schema
-        with nogil:
-            new_schema = self.schema.RemoveMetadata()
-        return pyarrow_wrap_schema(new_schema)
-
-    def __str__(self):
-        return frombytes(self.schema.ToString())
-
-    def __repr__(self):
-        return self.__str__()
-
-
-cdef dict box_metadata(const CKeyValueMetadata* metadata):
-    cdef unordered_map[c_string, c_string] result
-    if metadata != nullptr:
-        metadata.ToUnorderedMap(&result)
-        return result
-    else:
-        return None
-
-
-cdef dict _type_cache = {}
-
-
-cdef DataType primitive_type(Type type):
-    if type in _type_cache:
-        return _type_cache[type]
-
-    cdef DataType out = DataType()
-    out.init(GetPrimitiveType(type))
-
-    _type_cache[type] = out
-    return out
-
-#------------------------------------------------------------
-# Type factory functions
-
-cdef int convert_metadata(dict metadata,
-                          shared_ptr[CKeyValueMetadata]* out) except -1:
-    cdef:
-        shared_ptr[CKeyValueMetadata] meta = (
-            make_shared[CKeyValueMetadata]())
-        c_string key, value
-
-    for py_key, py_value in metadata.items():
-        key = tobytes(py_key)
-        value = tobytes(py_value)
-        meta.get().Append(key, value)
-    out[0] = meta
-    return 0
-
-
-def field(name, DataType type, bint nullable=True, dict metadata=None):
-    """
-    Create a pyarrow.Field instance
-
-    Parameters
-    ----------
-    name : string or bytes
-    type : pyarrow.DataType
-    nullable : boolean, default True
-    metadata : dict, default None
-        Keys and values must be coercible to bytes
-
-    Returns
-    -------
-    field : pyarrow.Field
-    """
-    cdef:
-        shared_ptr[CKeyValueMetadata] c_meta
-        Field result = Field()
-
-    if metadata is not None:
-        convert_metadata(metadata, &c_meta)
-
-    result.sp_field.reset(new CField(tobytes(name), type.sp_type,
-                                     nullable, c_meta))
-    result.field = result.sp_field.get()
-    result.type = type
-    return result
-
-
-cdef set PRIMITIVE_TYPES = set([
-    _Type_NA, _Type_BOOL,
-    _Type_UINT8, _Type_INT8,
-    _Type_UINT16, _Type_INT16,
-    _Type_UINT32, _Type_INT32,
-    _Type_UINT64, _Type_INT64,
-    _Type_TIMESTAMP, _Type_DATE32,
-    _Type_TIME32, _Type_TIME64,
-    _Type_DATE64,
-    _Type_HALF_FLOAT,
-    _Type_FLOAT,
-    _Type_DOUBLE])
-
-
-def null():
-    return primitive_type(_Type_NA)
-
-
-def bool_():
-    return primitive_type(_Type_BOOL)
-
-
-def uint8():
-    return primitive_type(_Type_UINT8)
-
-
-def int8():
-    return primitive_type(_Type_INT8)
-
-
-def uint16():
-    return primitive_type(_Type_UINT16)
-
-
-def int16():
-    return primitive_type(_Type_INT16)
-
-
-def uint32():
-    return primitive_type(_Type_UINT32)
-
-
-def int32():
-    return primitive_type(_Type_INT32)
-
-
-def uint64():
-    return primitive_type(_Type_UINT64)
-
-
-def int64():
-    return primitive_type(_Type_INT64)
-
-
-cdef dict _timestamp_type_cache = {}
-cdef dict _time_type_cache = {}
-
-
-cdef timeunit_to_string(TimeUnit unit):
-    if unit == TimeUnit_SECOND:
-        return 's'
-    elif unit == TimeUnit_MILLI:
-        return 'ms'
-    elif unit == TimeUnit_MICRO:
-        return 'us'
-    elif unit == TimeUnit_NANO:
-        return 'ns'
-
-
-def timestamp(unit_str, tz=None):
-    cdef:
-        TimeUnit unit
-        c_string c_timezone
-
-    if unit_str == "s":
-        unit = TimeUnit_SECOND
-    elif unit_str == 'ms':
-        unit = TimeUnit_MILLI
-    elif unit_str == 'us':
-        unit = TimeUnit_MICRO
-    elif unit_str == 'ns':
-        unit = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit string')
-
-    cdef TimestampType out = TimestampType()
-
-    if tz is None:
-        out.init(ctimestamp(unit))
-        if unit in _timestamp_type_cache:
-            return _timestamp_type_cache[unit]
-        _timestamp_type_cache[unit] = out
-    else:
-        if not isinstance(tz, six.string_types):
-            tz = tz.zone
-
-        c_timezone = tobytes(tz)
-        out.init(ctimestamp(unit, c_timezone))
-
-    return out
-
-
-def time32(unit_str):
-    cdef:
-        TimeUnit unit
-        c_string c_timezone
-
-    if unit_str == "s":
-        unit = TimeUnit_SECOND
-    elif unit_str == 'ms':
-        unit = TimeUnit_MILLI
-    else:
-        raise ValueError('Invalid TimeUnit for time32: {}'.format(unit_str))
-
-    cdef Time32Type out
-    if unit in _time_type_cache:
-        return _time_type_cache[unit]
-    else:
-        out = Time32Type()
-        out.init(ctime32(unit))
-        _time_type_cache[unit] = out
-        return out
-
-
-def time64(unit_str):
-    cdef:
-        TimeUnit unit
-        c_string c_timezone
-
-    if unit_str == "us":
-        unit = TimeUnit_MICRO
-    elif unit_str == 'ns':
-        unit = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit for time64: {}'.format(unit_str))
-
-    cdef Time64Type out
-    if unit in _time_type_cache:
-        return _time_type_cache[unit]
-    else:
-        out = Time64Type()
-        out.init(ctime64(unit))
-        _time_type_cache[unit] = out
-        return out
-
-
-def date32():
-    return primitive_type(_Type_DATE32)
-
-
-def date64():
-    return primitive_type(_Type_DATE64)
-
-
-def float16():
-    return primitive_type(_Type_HALF_FLOAT)
-
-
-def float32():
-    return primitive_type(_Type_FLOAT)
-
-
-def float64():
-    return primitive_type(_Type_DOUBLE)
-
-
-cpdef DataType decimal(int precision, int scale=0):
-    cdef shared_ptr[CDataType] decimal_type
-    decimal_type.reset(new CDecimalType(precision, scale))
-    return pyarrow_wrap_data_type(decimal_type)
-
-
-def string():
-    """
-    UTF8 string
-    """
-    return primitive_type(_Type_STRING)
-
-
-def binary(int length=-1):
-    """Binary (PyBytes-like) type
-
-    Parameters
-    ----------
-    length : int, optional, default -1
-        If length == -1 then return a variable length binary type. If length is
-        greater than or equal to 0 then return a fixed size binary type of
-        width `length`.
-    """
-    if length == -1:
-        return primitive_type(_Type_BINARY)
-
-    cdef shared_ptr[CDataType] fixed_size_binary_type
-    fixed_size_binary_type.reset(new CFixedSizeBinaryType(length))
-    return pyarrow_wrap_data_type(fixed_size_binary_type)
-
-
-cpdef ListType list_(value_type):
-    """
-    Create ListType instance from child data type or field
-
-    Parameters
-    ----------
-    value_type : DataType or Field
-
-    Returns
-    -------
-    list_type : DataType
-    """
-    cdef:
-        DataType data_type
-        Field field
-        shared_ptr[CDataType] list_type
-        ListType out = ListType()
-
-    if isinstance(value_type, DataType):
-        list_type.reset(new CListType((<DataType> value_type).sp_type))
-    elif isinstance(value_type, Field):
-        list_type.reset(new CListType((<Field> value_type).sp_field))
-    else:
-        raise ValueError('List requires DataType or Field')
-
-    out.init(list_type)
-    return out
-
-
-cpdef DictionaryType dictionary(DataType index_type, Array dictionary):
-    """
-    Dictionary (categorical, or simply encoded) type
-
-    Parameters
-    ----------
-    index_type : DataType
-    dictionary : Array
-
-    Returns
-    -------
-    type : DictionaryType
-    """
-    cdef DictionaryType out = DictionaryType()
-    cdef shared_ptr[CDataType] dict_type
-    dict_type.reset(new CDictionaryType(index_type.sp_type,
-                                        dictionary.sp_array))
-    out.init(dict_type)
-    return out
-
-
-def struct(fields):
-    """
-    Create StructType instance from fields
-
-    Parameters
-    ----------
-    fields : sequence of Field values
-
-    Examples
-    --------
-    import pyarrow as pa
-    fields = [
-        pa.field('f1', pa.int32()),
-        pa.field('f2', pa.string())
-    ]
-    struct_type = pa.struct(fields)
-
-    Returns
-    -------
-    type : DataType
-    """
-    cdef:
-        Field field
-        vector[shared_ptr[CField]] c_fields
-        cdef shared_ptr[CDataType] struct_type
-
-    for field in fields:
-        c_fields.push_back(field.sp_field)
-
-    struct_type.reset(new CStructType(c_fields))
-    return pyarrow_wrap_data_type(struct_type)
-
-
-def schema(fields):
-    """
-    Construct pyarrow.Schema from collection of fields
-
-    Parameters
-    ----------
-    field : list or iterable
-
-    Returns
-    -------
-    schema : pyarrow.Schema
-    """
-    cdef:
-        Schema result
-        Field field
-        vector[shared_ptr[CField]] c_fields
-
-    for i, field in enumerate(fields):
-        c_fields.push_back(field.sp_field)
-
-    result = Schema()
-    result.init(c_fields)
-    return result
-
-
-def from_numpy_dtype(object dtype):
-    """
-    Convert NumPy dtype to pyarrow.DataType
-    """
-    cdef shared_ptr[CDataType] c_type
-    with nogil:
-        check_status(NumPyDtypeToArrow(dtype, &c_type))
-
-    return pyarrow_wrap_data_type(c_type)
-
-
-NA = None
-
-
-cdef class NAType(Scalar):
-
-    def __cinit__(self):
-        global NA
-        if NA is not None:
-            raise Exception('Cannot create multiple NAType instances')
-
-        self.type = null()
-
-    def __repr__(self):
-        return 'NA'
-
-    def as_py(self):
-        return None
-
-
-NA = NAType()
-
-
-cdef class ArrayValue(Scalar):
-
-    cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-                   int64_t index):
-        self.type = type
-        self.index = index
-        self._set_array(sp_array)
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-
-    def _check_null(self):
-        if self.sp_array.get() == NULL:
-            raise ReferenceError(
-                'ArrayValue instance not propertly initialized '
-                '(references NULL pointer)')
-
-    def __repr__(self):
-        self._check_null()
-        if hasattr(self, 'as_py'):
-            return repr(self.as_py())
-        else:
-            return super(Scalar, self).__repr__()
-
-
-cdef class BooleanValue(ArrayValue):
-
-    def as_py(self):
-        cdef CBooleanArray* ap = <CBooleanArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int8Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt8Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int16Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt16Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Date32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CDate32Array* ap = <CDate32Array*> self.sp_array.get()
-
-        # Shift to seconds since epoch
-        return datetime.datetime.utcfromtimestamp(
-            int(ap.Value(self.index)) * 86400).date()
-
-
-cdef class Date64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CDate64Array* ap = <CDate64Array*> self.sp_array.get()
-        return datetime.datetime.utcfromtimestamp(
-            ap.Value(self.index) / 1000).date()
-
-
-cdef class Time32Value(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CTime32Array* ap = <CTime32Array*> self.sp_array.get()
-            CTime32Type* dtype = <CTime32Type*> ap.type().get()
-
-        if dtype.unit() == TimeUnit_SECOND:
-            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=ap.Value(self.index))).time()
-        else:
-            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(milliseconds=ap.Value(self.index))).time()
-
-
-cdef class Time64Value(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CTime64Array* ap = <CTime64Array*> self.sp_array.get()
-            CTime64Type* dtype = <CTime64Type*> ap.type().get()
-
-        if dtype.unit() == TimeUnit_MICRO:
-            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(microseconds=ap.Value(self.index))).time()
-        else:
-            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(microseconds=ap.Value(self.index) / 1000)).time()
-
-
-cdef dict DATETIME_CONVERSION_FUNCTIONS
-
-try:
-    import pandas as pd
-except ImportError:
-    DATETIME_CONVERSION_FUNCTIONS = {
-        TimeUnit_SECOND: lambda x, tzinfo: (
-            datetime.datetime.utcfromtimestamp(x).replace(tzinfo=tzinfo)
-        ),
-        TimeUnit_MILLI: lambda x, tzinfo: (
-            datetime.datetime.utcfromtimestamp(x / 1e3).replace(tzinfo=tzinfo)
-        ),
-        TimeUnit_MICRO: lambda x, tzinfo: (
-            datetime.datetime.utcfromtimestamp(x / 1e6).replace(tzinfo=tzinfo)
-        ),
-    }
-else:
-    DATETIME_CONVERSION_FUNCTIONS = {
-        TimeUnit_SECOND: lambda x, tzinfo: pd.Timestamp(
-            x * 1000000000, tz=tzinfo, unit='ns',
-        ),
-        TimeUnit_MILLI: lambda x, tzinfo: pd.Timestamp(
-            x * 1000000, tz=tzinfo, unit='ns',
-        ),
-        TimeUnit_MICRO: lambda x, tzinfo: pd.Timestamp(
-            x * 1000, tz=tzinfo, unit='ns',
-        ),
-        TimeUnit_NANO: lambda x, tzinfo: pd.Timestamp(
-            x, tz=tzinfo, unit='ns',
-        )
-    }
-
-
-cdef class TimestampValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CTimestampArray* ap = <CTimestampArray*> self.sp_array.get()
-            CTimestampType* dtype = <CTimestampType*> ap.type().get()
-            int64_t value = ap.Value(self.index)
-
-        if not dtype.timezone().empty():
-            import pytz
-            tzinfo = pytz.timezone(frombytes(dtype.timezone()))
-        else:
-            tzinfo = None
-
-        try:
-            converter = DATETIME_CONVERSION_FUNCTIONS[dtype.unit()]
-        except KeyError:
-            raise ValueError(
-                'Cannot convert nanosecond timestamps without pandas'
-            )
-        return converter(value, tzinfo=tzinfo)
-
-
-cdef class FloatValue(ArrayValue):
-
-    def as_py(self):
-        cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class DoubleValue(ArrayValue):
-
-    def as_py(self):
-        cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class DecimalValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CDecimalArray* ap = <CDecimalArray*> self.sp_array.get()
-            c_string s = ap.FormatValue(self.index)
-        return _pydecimal.Decimal(s.decode('utf8'))
-
-
-cdef class StringValue(ArrayValue):
-
-    def as_py(self):
-        cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
-        return ap.GetString(self.index).decode('utf-8')
-
-
-cdef class BinaryValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            const uint8_t* ptr
-            int32_t length
-            CBinaryArray* ap = <CBinaryArray*> self.sp_array.get()
-
-        ptr = ap.GetValue(self.index, &length)
-        return cp.PyBytes_FromStringAndSize(<const char*>(ptr), length)
-
-
-cdef class ListValue(ArrayValue):
-
-    def __len__(self):
-        return self.ap.value_length(self.index)
-
-    def __getitem__(self, i):
-        return self.getitem(i)
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield self.getitem(i)
-        raise StopIteration
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-        self.ap = <CListArray*> sp_array.get()
-        self.value_type = pyarrow_wrap_data_type(self.ap.value_type())
-
-    cdef getitem(self, int64_t i):
-        cdef int64_t j = self.ap.value_offset(self.index) + i
-        return box_scalar(self.value_type, self.ap.values(), j)
-
-    def as_py(self):
-        cdef:
-            int64_t j
-            list result = []
-
-        for j in range(len(self)):
-            result.append(self.getitem(j).as_py())
-
-        return result
-
-
-cdef class FixedSizeBinaryValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CFixedSizeBinaryArray* ap
-            CFixedSizeBinaryType* ap_type
-            int32_t length
-            const char* data
-        ap = <CFixedSizeBinaryArray*> self.sp_array.get()
-        ap_type = <CFixedSizeBinaryType*> ap.type().get()
-        length = ap_type.byte_width()
-        data = <const char*> ap.GetValue(self.index)
-        return cp.PyBytes_FromStringAndSize(data, length)
-
-
-cdef class StructValue(ArrayValue):
-    def as_py(self):
-        cdef:
-            CStructArray* ap
-            vector[shared_ptr[CField]] child_fields = self.type.type.children()
-        ap = <CStructArray*> self.sp_array.get()
-        wrapped_arrays = (pyarrow_wrap_array(ap.field(i))
-                          for i in range(ap.num_fields()))
-        child_names = (child.get().name() for child in child_fields)
-        # Return the struct as a dict
-        return {
-            frombytes(name): child_array[self.index].as_py()
-            for name, child_array in
-            zip(child_names, wrapped_arrays)
-        }
-
-cdef dict _scalar_classes = {
-    _Type_BOOL: BooleanValue,
-    _Type_UINT8: Int8Value,
-    _Type_UINT16: Int16Value,
-    _Type_UINT32: Int32Value,
-    _Type_UINT64: Int64Value,
-    _Type_INT8: Int8Value,
-    _Type_INT16: Int16Value,
-    _Type_INT32: Int32Value,
-    _Type_INT64: Int64Value,
-    _Type_DATE32: Date32Value,
-    _Type_DATE64: Date64Value,
-    _Type_TIME32: Time32Value,
-    _Type_TIME64: Time64Value,
-    _Type_TIMESTAMP: TimestampValue,
-    _Type_FLOAT: FloatValue,
-    _Type_DOUBLE: DoubleValue,
-    _Type_LIST: ListValue,
-    _Type_BINARY: BinaryValue,
-    _Type_STRING: StringValue,
-    _Type_FIXED_SIZE_BINARY: FixedSizeBinaryValue,
-    _Type_DECIMAL: DecimalValue,
-    _Type_STRUCT: StructValue,
-}
-
-cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,
-                       int64_t index):
-    cdef ArrayValue val
-    if type.type.id() == _Type_NA:
-        return NA
-    elif sp_array.get().IsNull(index):
-        return NA
-    else:
-        val = _scalar_classes[type.type.id()]()
-        val.init(type, sp_array, index)
-        return val
-
 
 cdef maybe_coerce_datetime64(values, dtype, DataType type,
                              timestamps_to_ms=False):
@@ -1229,7 +113,7 @@ cdef class Array:
         series : pandas.Series or numpy.ndarray
 
         mask : pandas.Series or numpy.ndarray, optional
-            boolean mask if the object is valid or null
+            boolean mask if the object is null (True) or valid (False)
 
         type : pyarrow.DataType
             Explicit type to attempt to coerce to
@@ -1690,6 +574,7 @@ cdef class DictionaryArray(Array):
         result.init(c_result)
         return result
 
+
 cdef class StructArray(Array):
     @staticmethod
     def from_arrays(field_names, arrays):

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/python/pyarrow/lib.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index cf8e4df..5990308 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -100,6 +100,12 @@ include "error.pxi"
 # Memory pools and allocation
 include "memory.pxi"
 
+# DataType, Field, Schema
+include "types.pxi"
+
+# Array scalar values
+include "scalar.pxi"
+
 # Array types
 include "array.pxi"
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/python/pyarrow/pandas_compat.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index c909b3e..9b2a5c4 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -62,6 +62,8 @@ def get_logical_type_map():
             pa.lib.Type_DOUBLE: 'float64',
             pa.lib.Type_DATE32: 'date',
             pa.lib.Type_DATE64: 'date',
+            pa.lib.Type_TIME32: 'time',
+            pa.lib.Type_TIME64: 'time',
             pa.lib.Type_BINARY: 'bytes',
             pa.lib.Type_FIXED_SIZE_BINARY: 'bytes',
             pa.lib.Type_STRING: 'unicode',

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/python/pyarrow/scalar.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
new file mode 100644
index 0000000..11ed0ef
--- /dev/null
+++ b/python/pyarrow/scalar.pxi
@@ -0,0 +1,376 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+NA = None
+
+
+cdef class NAType(Scalar):
+
+    def __cinit__(self):
+        global NA
+        if NA is not None:
+            raise Exception('Cannot create multiple NAType instances')
+
+        self.type = null()
+
+    def __repr__(self):
+        return 'NA'
+
+    def as_py(self):
+        return None
+
+
+NA = NAType()
+
+
+cdef class ArrayValue(Scalar):
+
+    cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
+                   int64_t index):
+        self.type = type
+        self.index = index
+        self._set_array(sp_array)
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+
+    def _check_null(self):
+        if self.sp_array.get() == NULL:
+            raise ReferenceError(
+                'ArrayValue instance not propertly initialized '
+                '(references NULL pointer)')
+
+    def __repr__(self):
+        self._check_null()
+        if hasattr(self, 'as_py'):
+            return repr(self.as_py())
+        else:
+            return super(Scalar, self).__repr__()
+
+
+cdef class BooleanValue(ArrayValue):
+
+    def as_py(self):
+        cdef CBooleanArray* ap = <CBooleanArray*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int8Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt8Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int16Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt16Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int32Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt32Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int64Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt64Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Date32Value(ArrayValue):
+
+    def as_py(self):
+        cdef CDate32Array* ap = <CDate32Array*> self.sp_array.get()
+
+        # Shift to seconds since epoch
+        return datetime.datetime.utcfromtimestamp(
+            int(ap.Value(self.index)) * 86400).date()
+
+
+cdef class Date64Value(ArrayValue):
+
+    def as_py(self):
+        cdef CDate64Array* ap = <CDate64Array*> self.sp_array.get()
+        return datetime.datetime.utcfromtimestamp(
+            ap.Value(self.index) / 1000).date()
+
+
+cdef class Time32Value(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CTime32Array* ap = <CTime32Array*> self.sp_array.get()
+            CTime32Type* dtype = <CTime32Type*> ap.type().get()
+
+        if dtype.unit() == TimeUnit_SECOND:
+            return (datetime.datetime(1970, 1, 1) +
+                    datetime.timedelta(seconds=ap.Value(self.index))).time()
+        else:
+            return (datetime.datetime(1970, 1, 1) +
+                    datetime.timedelta(milliseconds=ap.Value(self.index))).time()
+
+
+cdef class Time64Value(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CTime64Array* ap = <CTime64Array*> self.sp_array.get()
+            CTime64Type* dtype = <CTime64Type*> ap.type().get()
+
+        cdef int64_t val = ap.Value(self.index)
+        print(val)
+        if dtype.unit() == TimeUnit_MICRO:
+            return (datetime.datetime(1970, 1, 1) +
+                    datetime.timedelta(microseconds=val)).time()
+        else:
+            return (datetime.datetime(1970, 1, 1) +
+                    datetime.timedelta(microseconds=val / 1000)).time()
+
+
+cdef dict DATETIME_CONVERSION_FUNCTIONS
+
+try:
+    import pandas as pd
+except ImportError:
+    DATETIME_CONVERSION_FUNCTIONS = {
+        TimeUnit_SECOND: lambda x, tzinfo: (
+            datetime.datetime.utcfromtimestamp(x).replace(tzinfo=tzinfo)
+        ),
+        TimeUnit_MILLI: lambda x, tzinfo: (
+            datetime.datetime.utcfromtimestamp(x / 1e3).replace(tzinfo=tzinfo)
+        ),
+        TimeUnit_MICRO: lambda x, tzinfo: (
+            datetime.datetime.utcfromtimestamp(x / 1e6).replace(tzinfo=tzinfo)
+        ),
+    }
+else:
+    DATETIME_CONVERSION_FUNCTIONS = {
+        TimeUnit_SECOND: lambda x, tzinfo: pd.Timestamp(
+            x * 1000000000, tz=tzinfo, unit='ns',
+        ),
+        TimeUnit_MILLI: lambda x, tzinfo: pd.Timestamp(
+            x * 1000000, tz=tzinfo, unit='ns',
+        ),
+        TimeUnit_MICRO: lambda x, tzinfo: pd.Timestamp(
+            x * 1000, tz=tzinfo, unit='ns',
+        ),
+        TimeUnit_NANO: lambda x, tzinfo: pd.Timestamp(
+            x, tz=tzinfo, unit='ns',
+        )
+    }
+
+
+cdef class TimestampValue(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CTimestampArray* ap = <CTimestampArray*> self.sp_array.get()
+            CTimestampType* dtype = <CTimestampType*> ap.type().get()
+            int64_t value = ap.Value(self.index)
+
+        if not dtype.timezone().empty():
+            import pytz
+            tzinfo = pytz.timezone(frombytes(dtype.timezone()))
+        else:
+            tzinfo = None
+
+        try:
+            converter = DATETIME_CONVERSION_FUNCTIONS[dtype.unit()]
+        except KeyError:
+            raise ValueError(
+                'Cannot convert nanosecond timestamps without pandas'
+            )
+        return converter(value, tzinfo=tzinfo)
+
+
+cdef class FloatValue(ArrayValue):
+
+    def as_py(self):
+        cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class DoubleValue(ArrayValue):
+
+    def as_py(self):
+        cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class DecimalValue(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CDecimalArray* ap = <CDecimalArray*> self.sp_array.get()
+            c_string s = ap.FormatValue(self.index)
+        return _pydecimal.Decimal(s.decode('utf8'))
+
+
+cdef class StringValue(ArrayValue):
+
+    def as_py(self):
+        cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
+        return ap.GetString(self.index).decode('utf-8')
+
+
+cdef class BinaryValue(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            const uint8_t* ptr
+            int32_t length
+            CBinaryArray* ap = <CBinaryArray*> self.sp_array.get()
+
+        ptr = ap.GetValue(self.index, &length)
+        return cp.PyBytes_FromStringAndSize(<const char*>(ptr), length)
+
+
+cdef class ListValue(ArrayValue):
+
+    def __len__(self):
+        return self.ap.value_length(self.index)
+
+    def __getitem__(self, i):
+        return self.getitem(i)
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield self.getitem(i)
+        raise StopIteration
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+        self.ap = <CListArray*> sp_array.get()
+        self.value_type = pyarrow_wrap_data_type(self.ap.value_type())
+
+    cdef getitem(self, int64_t i):
+        cdef int64_t j = self.ap.value_offset(self.index) + i
+        return box_scalar(self.value_type, self.ap.values(), j)
+
+    def as_py(self):
+        cdef:
+            int64_t j
+            list result = []
+
+        for j in range(len(self)):
+            result.append(self.getitem(j).as_py())
+
+        return result
+
+
+cdef class FixedSizeBinaryValue(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CFixedSizeBinaryArray* ap
+            CFixedSizeBinaryType* ap_type
+            int32_t length
+            const char* data
+        ap = <CFixedSizeBinaryArray*> self.sp_array.get()
+        ap_type = <CFixedSizeBinaryType*> ap.type().get()
+        length = ap_type.byte_width()
+        data = <const char*> ap.GetValue(self.index)
+        return cp.PyBytes_FromStringAndSize(data, length)
+
+
+cdef class StructValue(ArrayValue):
+    def as_py(self):
+        cdef:
+            CStructArray* ap
+            vector[shared_ptr[CField]] child_fields = self.type.type.children()
+        ap = <CStructArray*> self.sp_array.get()
+        wrapped_arrays = (pyarrow_wrap_array(ap.field(i))
+                          for i in range(ap.num_fields()))
+        child_names = (child.get().name() for child in child_fields)
+        # Return the struct as a dict
+        return {
+            frombytes(name): child_array[self.index].as_py()
+            for name, child_array in
+            zip(child_names, wrapped_arrays)
+        }
+
+cdef dict _scalar_classes = {
+    _Type_BOOL: BooleanValue,
+    _Type_UINT8: Int8Value,
+    _Type_UINT16: Int16Value,
+    _Type_UINT32: Int32Value,
+    _Type_UINT64: Int64Value,
+    _Type_INT8: Int8Value,
+    _Type_INT16: Int16Value,
+    _Type_INT32: Int32Value,
+    _Type_INT64: Int64Value,
+    _Type_DATE32: Date32Value,
+    _Type_DATE64: Date64Value,
+    _Type_TIME32: Time32Value,
+    _Type_TIME64: Time64Value,
+    _Type_TIMESTAMP: TimestampValue,
+    _Type_FLOAT: FloatValue,
+    _Type_DOUBLE: DoubleValue,
+    _Type_LIST: ListValue,
+    _Type_BINARY: BinaryValue,
+    _Type_STRING: StringValue,
+    _Type_FIXED_SIZE_BINARY: FixedSizeBinaryValue,
+    _Type_DECIMAL: DecimalValue,
+    _Type_STRUCT: StructValue,
+}
+
+cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,
+                       int64_t index):
+    cdef ArrayValue val
+    if type.type.id() == _Type_NA:
+        return NA
+    elif sp_array.get().IsNull(index):
+        return NA
+    else:
+        val = _scalar_classes[type.type.id()]()
+        val.init(type, sp_array, index)
+        return val

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/python/pyarrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index ed81531..413a3be 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -223,8 +223,8 @@ def test_simple_type_construction():
         (pa.decimal(18, 3), 'decimal'),
         (pa.timestamp('ms'), 'datetime'),
         (pa.timestamp('us', 'UTC'), 'datetimetz'),
-        pytest.mark.xfail((pa.time32('s'), None), raises=NotImplementedError),
-        pytest.mark.xfail((pa.time64('us'), None), raises=NotImplementedError),
+        (pa.time32('s'), 'time'),
+        (pa.time64('us'), 'time')
     ]
 )
 def test_logical_type(type, expected):

http://git-wip-us.apache.org/repos/asf/arrow/blob/50b518af/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index b8b85ca..5b84817 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -646,6 +646,95 @@ class TestPandasConversion(unittest.TestCase):
         df = converted.to_pandas()
         tm.assert_frame_equal(df, expected)
 
+    def test_pytime_from_pandas(self):
+        pytimes = [datetime.time(1, 2, 3, 1356),
+                   datetime.time(4, 5, 6, 1356)]
+
+        # microseconds
+        t1 = pa.time64('us')
+
+        aobjs = np.array(pytimes + [None], dtype=object)
+        parr = pa.Array.from_pandas(aobjs)
+        assert parr.type == t1
+        assert parr[0].as_py() == pytimes[0]
+        assert parr[1].as_py() == pytimes[1]
+        assert parr[2] is pa.NA
+
+        # DataFrame
+        df = pd.DataFrame({'times': aobjs})
+        batch = pa.RecordBatch.from_pandas(df)
+        assert batch[0].equals(parr)
+
+        # Test ndarray of int64 values
+        arr = np.array([_pytime_to_micros(v) for v in pytimes],
+                       dtype='int64')
+
+        a1 = pa.Array.from_pandas(arr, type=pa.time64('us'))
+        assert a1[0].as_py() == pytimes[0]
+
+        a2 = pa.Array.from_pandas(arr * 1000, type=pa.time64('ns'))
+        assert a2[0].as_py() == pytimes[0]
+
+        a3 = pa.Array.from_pandas((arr / 1000).astype('i4'),
+                                  type=pa.time32('ms'))
+        assert a3[0].as_py() == pytimes[0].replace(microsecond=1000)
+
+        a4 = pa.Array.from_pandas((arr / 1000000).astype('i4'),
+                                  type=pa.time32('s'))
+        assert a4[0].as_py() == pytimes[0].replace(microsecond=0)
+
+    def test_arrow_time_to_pandas(self):
+        pytimes = [datetime.time(1, 2, 3, 1356),
+                   datetime.time(4, 5, 6, 1356),
+                   datetime.time(0, 0, 0)]
+
+        expected = np.array(pytimes[:2] + [None])
+        expected_ms = np.array([x.replace(microsecond=1000)
+                                for x in pytimes[:2]] +
+                               [None])
+        expected_s = np.array([x.replace(microsecond=0)
+                               for x in pytimes[:2]] +
+                              [None])
+
+        arr = np.array([_pytime_to_micros(v) for v in pytimes],
+                       dtype='int64')
+        arr = np.array([_pytime_to_micros(v) for v in pytimes],
+                       dtype='int64')
+
+        null_mask = np.array([False, False, True], dtype=bool)
+
+        a1 = pa.Array.from_pandas(arr, mask=null_mask, type=pa.time64('us'))
+        a2 = pa.Array.from_pandas(arr * 1000, mask=null_mask,
+                                  type=pa.time64('ns'))
+
+        a3 = pa.Array.from_pandas((arr / 1000).astype('i4'), mask=null_mask,
+                                  type=pa.time32('ms'))
+        a4 = pa.Array.from_pandas((arr / 1000000).astype('i4'), mask=null_mask,
+                                  type=pa.time32('s'))
+
+        names = ['time64[us]', 'time64[ns]', 'time32[ms]', 'time32[s]']
+        batch = pa.RecordBatch.from_arrays([a1, a2, a3, a4], names)
+        arr = a1.to_pandas()
+        assert (arr == expected).all()
+
+        arr = a2.to_pandas()
+        assert (arr == expected).all()
+
+        arr = a3.to_pandas()
+        assert (arr == expected_ms).all()
+
+        arr = a4.to_pandas()
+        assert (arr == expected_s).all()
+
+        df = batch.to_pandas()
+        expected_df = pd.DataFrame({'time64[us]': expected,
+                                    'time64[ns]': expected,
+                                    'time32[ms]': expected_ms,
+                                    'time32[s]': expected_s},
+                                   columns=names)
+
+        tm.assert_frame_equal(df, expected_df)
+
     def test_all_nones(self):
         def _check_series(s):
             converted = pa.Array.from_pandas(s)
@@ -782,3 +871,20 @@ class TestPandasConversion(unittest.TestCase):
         assert data_column['pandas_type'] == 'decimal'
         assert data_column['numpy_type'] == 'object'
         assert data_column['metadata'] == {'precision': 26, 'scale': 11}
+
+
+def _pytime_from_micros(val):
+    microseconds = val % 1000000
+    val //= 1000000
+    seconds = val % 60
+    val //= 60
+    minutes = val % 60
+    hours = val // 60
+    return datetime.time(hours, minutes, seconds, microseconds)
+
+
+def _pytime_to_micros(pytime):
+    return (pytime.hour * 3600000000 +
+            pytime.minute * 60000000 +
+            pytime.second * 1000000 +
+            pytime.microsecond)


Mime
View raw message