arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-722: [Python] Support additional date/time types and metadata, conversion to/from NumPy and pandas.DataFrame
Date Mon, 10 Apr 2017 00:00:37 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 754bcce68 -> 137aade40


ARROW-722: [Python] Support additional date/time types and metadata, conversion to/from NumPy and pandas.DataFrame

Would appreciate a close look from @xhochy, @cpcloud. Also did some inline visitor cleaning for nicer code reuse

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #510 from wesm/ARROW-722 and squashes the following commits:

3e1fda3 [Wes McKinney] cpplint
cb32a6b [Wes McKinney] Nicer error message. Run clang-format
854f470 [Wes McKinney] First cut refactor
06dce15 [Wes McKinney] Rebase conflicts
d1dc342 [Wes McKinney] Bring Python bindings to date/time types up to spec. Handle zero-copy creation from same-size int32/64. Use inline visitor in PandasConverter


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/137aade4
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/137aade4
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/137aade4

Branch: refs/heads/master
Commit: 137aade404cf53a7dbe0eaa31a868d1c376654b3
Parents: 754bcce
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Sun Apr 9 20:00:30 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Sun Apr 9 20:00:30 2017 -0400

----------------------------------------------------------------------
 cpp/CMakeLists.txt                          |   6 +
 cpp/src/arrow/CMakeLists.txt                |   1 +
 cpp/src/arrow/python/builtin_convert.cc     |  29 +-
 cpp/src/arrow/python/builtin_convert.h      |   4 +
 cpp/src/arrow/python/pandas_convert.cc      | 345 +++++++++++------------
 cpp/src/arrow/python/pandas_convert.h       |   3 -
 cpp/src/arrow/python/type_traits.h          |  63 +++--
 cpp/src/arrow/python/util/datetime.h        |   6 +-
 cpp/src/arrow/table-test.cc                 |  55 +---
 cpp/src/arrow/table.cc                      |  10 +-
 cpp/src/arrow/table.h                       |   4 +-
 cpp/src/arrow/type.cc                       |   4 +-
 cpp/src/arrow/type.h                        |   4 +-
 cpp/src/arrow/util/stl.h                    |   4 +-
 python/pyarrow/scalar.pyx                   |   6 +-
 python/pyarrow/tests/test_convert_pandas.py | 182 +++++++-----
 16 files changed, 399 insertions(+), 327 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5852fe5..b29cb7b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -386,6 +386,12 @@ enable_testing()
 
 set(Boost_DEBUG TRUE)
 set(Boost_USE_MULTITHREADED ON)
+set(Boost_ADDITIONAL_VERSIONS
+  "1.63.0" "1.63"
+  "1.62.0" "1.61"
+  "1.61.0" "1.62"
+  "1.60.0" "1.60")
+
 if (ARROW_BOOST_USE_SHARED)
   # Find shared Boost libraries.
   set(Boost_USE_STATIC_LIBS OFF)

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 8eaa76a..cb5282c 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -34,6 +34,7 @@ install(FILES
   type_traits.h
   test-util.h
   visitor.h
+  visitor_inline.h
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow")
 
 # pkg-config support

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 189ecee..a064a3d 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -43,6 +43,31 @@ static inline bool IsPyInteger(PyObject* obj) {
 #endif
 }
 
+Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
+  OwnedRef type(PyObject_Type(obj));
+  RETURN_IF_PYERROR();
+  DCHECK_NE(type.obj(), nullptr);
+
+  OwnedRef type_name(PyObject_GetAttrString(type.obj(), "__name__"));
+  RETURN_IF_PYERROR();
+  DCHECK_NE(type_name.obj(), nullptr);
+
+  PyObjectStringify bytestring(type_name.obj());
+  RETURN_IF_PYERROR();
+
+  const char* bytes = bytestring.bytes;
+  DCHECK_NE(bytes, nullptr) << "bytes from type(...).__name__ were null";
+
+  Py_ssize_t size = bytestring.size;
+
+  std::string cpp_type_name(bytes, size);
+
+  std::stringstream ss;
+  ss << "Python object of type " << cpp_type_name << " is not None and is not a "
+     << expected_type_name << " object";
+  return Status::Invalid(ss.str());
+}
+
 class ScalarVisitor {
  public:
   ScalarVisitor()
@@ -397,7 +422,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
       } else if (PyBytes_Check(item)) {
         bytes_obj = item;
       } else {
-        return Status::Invalid("Value that cannot be converted to bytes was encountered");
+        return InvalidConversion(item, "bytes");
       }
       // No error checking
       length = PyBytes_GET_SIZE(bytes_obj);
@@ -431,7 +456,7 @@ class FixedWidthBytesConverter : public TypedConverter<FixedSizeBinaryBuilder> {
       } else if (PyBytes_Check(item)) {
         bytes_obj = item;
       } else {
-        return Status::Invalid("Value that cannot be converted to bytes was encountered");
+        return InvalidConversion(item, "bytes");
       }
       // No error checking
       RETURN_NOT_OK(CheckPythonBytesAreFixedLength(bytes_obj, expected_length));

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
index 3c2e350..2141c25 100644
--- a/cpp/src/arrow/python/builtin_convert.h
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -24,6 +24,7 @@
 #include <Python.h>
 
 #include <memory>
+#include <string>
 
 #include "arrow/type.h"
 
@@ -60,6 +61,9 @@ ARROW_EXPORT
 Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out,
     const std::shared_ptr<DataType>& type, int64_t size);
 
+ARROW_EXPORT
+Status InvalidConversion(PyObject* obj, const std::string& expected_type_name);
+
 ARROW_EXPORT Status CheckPythonBytesAreFixedLength(
     PyObject* obj, Py_ssize_t expected_length);
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index f6e627e..5bb8e45 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -44,6 +44,7 @@
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/visitor_inline.h"
 
 #include "arrow/python/builtin_convert.h"
 #include "arrow/python/common.h"
@@ -271,7 +272,7 @@ static inline bool ListTypeSupported(const Type::type type_id) {
 // ----------------------------------------------------------------------
 // Conversion from NumPy-in-Pandas to Arrow
 
-class PandasConverter : public TypeVisitor {
+class PandasConverter {
  public:
   PandasConverter(
       MemoryPool* pool, PyObject* ao, PyObject* mo, const std::shared_ptr<DataType>& type)
@@ -332,23 +333,37 @@ class PandasConverter : public TypeVisitor {
     return LoadArray(type_, fields, {null_bitmap_, data}, &out_);
   }
 
-#define VISIT_NATIVE(TYPE) \
-  Status Visit(const TYPE& type) override { return VisitNative<TYPE>(); }
+  template <typename T>
+  typename std::enable_if<std::is_base_of<PrimitiveCType, T>::value ||
+                              std::is_same<BooleanType, T>::value,
+      Status>::type
+  Visit(const T& type) {
+    return VisitNative<T>();
+  }
+
+  Status Visit(const Date32Type& type) { return VisitNative<Int32Type>(); }
+  Status Visit(const Date64Type& type) { return VisitNative<Int64Type>(); }
+  Status Visit(const TimestampType& type) { return VisitNative<TimestampType>(); }
+  Status Visit(const Time32Type& type) { return VisitNative<Int32Type>(); }
+  Status Visit(const Time64Type& type) { return VisitNative<Int64Type>(); }
+
+  Status Visit(const NullType& type) { return Status::NotImplemented("null"); }
+
+  Status Visit(const BinaryType& type) { return Status::NotImplemented(type.ToString()); }
+
+  Status Visit(const FixedSizeBinaryType& type) {
+    return Status::NotImplemented(type.ToString());
+  }
 
-  VISIT_NATIVE(BooleanType);
-  VISIT_NATIVE(Int8Type);
-  VISIT_NATIVE(Int16Type);
-  VISIT_NATIVE(Int32Type);
-  VISIT_NATIVE(Int64Type);
-  VISIT_NATIVE(UInt8Type);
-  VISIT_NATIVE(UInt16Type);
-  VISIT_NATIVE(UInt32Type);
-  VISIT_NATIVE(UInt64Type);
-  VISIT_NATIVE(FloatType);
-  VISIT_NATIVE(DoubleType);
-  VISIT_NATIVE(TimestampType);
+  Status Visit(const DecimalType& type) {
+    return Status::NotImplemented(type.ToString());
+  }
 
-#undef VISIT_NATIVE
+  Status Visit(const DictionaryType& type) {
+    return Status::NotImplemented(type.ToString());
+  }
+
+  Status Visit(const NestedType& type) { return Status::NotImplemented(type.ToString()); }
 
   Status Convert() {
     if (PyArray_NDIM(arr_) != 1) {
@@ -358,9 +373,7 @@ class PandasConverter : public TypeVisitor {
     if (type_ == nullptr) { return Status::Invalid("Must pass data type"); }
 
     // Visit the type to perform conversion
-    RETURN_NOT_OK(type_->Accept(this));
-
-    return Status::OK();
+    return VisitTypeInline(*type_, this);
   }
 
   std::shared_ptr<Array> result() const { return out_; }
@@ -371,10 +384,12 @@ class PandasConverter : public TypeVisitor {
   template <int ITEM_TYPE, typename ArrowType>
   Status ConvertTypedLists(const std::shared_ptr<DataType>& type);
 
+  template <typename ArrowType>
+  Status ConvertDates();
+
   Status ConvertObjectStrings();
   Status ConvertObjectFixedWidthBytes(const std::shared_ptr<DataType>& type);
   Status ConvertBooleans();
-  Status ConvertDates();
   Status ConvertLists(const std::shared_ptr<DataType>& type);
   Status ConvertObjects();
   Status ConvertDecimals();
@@ -462,41 +477,36 @@ inline Status PandasConverter::ConvertData<BooleanType>(std::shared_ptr<Buffer>*
   return Status::OK();
 }
 
-Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
-  OwnedRef type(PyObject_Type(obj));
-  RETURN_IF_PYERROR();
-  DCHECK_NE(type.obj(), nullptr);
-
-  OwnedRef type_name(PyObject_GetAttrString(type.obj(), "__name__"));
-  RETURN_IF_PYERROR();
-  DCHECK_NE(type_name.obj(), nullptr);
-
-  PyObjectStringify bytestring(type_name.obj());
-  RETURN_IF_PYERROR();
-
-  const char* bytes = bytestring.bytes;
-  DCHECK_NE(bytes, nullptr) << "bytes from type(...).__name__ were null";
-
-  Py_ssize_t size = bytestring.size;
+template <typename T>
+struct UnboxDate {};
 
-  std::string cpp_type_name(bytes, size);
+template <>
+struct UnboxDate<Date32Type> {
+  static int64_t Unbox(PyObject* obj) {
+    return PyDate_to_days(reinterpret_cast<PyDateTime_Date*>(obj));
+  }
+};
 
-  std::stringstream ss;
-  ss << "Python object of type " << cpp_type_name << " is not None and is not a "
-     << expected_type_name << " object";
-  return Status::Invalid(ss.str());
-}
+template <>
+struct UnboxDate<Date64Type> {
+  static int64_t Unbox(PyObject* obj) {
+    return PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(obj));
+  }
+};
 
+template <typename ArrowType>
 Status PandasConverter::ConvertDates() {
   PyAcquireGIL lock;
 
+  using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
+
   Ndarray1DIndexer<PyObject*> objects(arr_);
 
   if (mask_ != nullptr) {
     return Status::NotImplemented("mask not supported in object conversions yet");
   }
 
-  Date64Builder date_builder(pool_);
+  BuilderType date_builder(pool_);
   RETURN_NOT_OK(date_builder.Resize(length_));
 
   /// We have to run this in this compilation unit, since we cannot use the
@@ -508,8 +518,7 @@ Status PandasConverter::ConvertDates() {
   for (int64_t i = 0; i < length_; ++i) {
     obj = objects[i];
     if (PyDate_CheckExact(obj)) {
-      PyDateTime_Date* pydate = reinterpret_cast<PyDateTime_Date*>(obj);
-      date_builder.Append(PyDate_to_ms(pydate));
+      date_builder.Append(UnboxDate<ArrowType>::Unbox(obj));
     } else if (PyObject_is_null(obj)) {
       date_builder.AppendNull();
     } else {
@@ -762,8 +771,10 @@ Status PandasConverter::ConvertObjects() {
         return ConvertObjectFixedWidthBytes(type_);
       case Type::BOOL:
         return ConvertBooleans();
+      case Type::DATE32:
+        return ConvertDates<Date32Type>();
       case Type::DATE64:
-        return ConvertDates();
+        return ConvertDates<Date64Type>();
       case Type::LIST: {
         const auto& list_field = static_cast<const ListType&>(*type_);
         return ConvertLists(list_field.value_field()->type);
@@ -787,7 +798,8 @@ Status PandasConverter::ConvertObjects() {
       } else if (PyBool_Check(objects[i])) {
         return ConvertBooleans();
       } else if (PyDate_CheckExact(objects[i])) {
-        return ConvertDates();
+        // We could choose Date32 or Date64
+        return ConvertDates<Date32Type>();
       } else if (PyObject_IsInstance(const_cast<PyObject*>(objects[i]), Decimal.obj())) {
         return ConvertDecimals();
       } else {
@@ -955,34 +967,6 @@ Status PandasObjectsToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo,
 // ----------------------------------------------------------------------
 // pandas 0.x DataFrame conversion internals
 
-inline void set_numpy_metadata(int type, DataType* datatype, PyArrayObject* out) {
-  if (type == NPY_DATETIME) {
-    PyArray_Descr* descr = PyArray_DESCR(out);
-    auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
-    if (datatype->type == Type::TIMESTAMP) {
-      auto timestamp_type = static_cast<TimestampType*>(datatype);
-
-      switch (timestamp_type->unit) {
-        case TimestampType::Unit::SECOND:
-          date_dtype->meta.base = NPY_FR_s;
-          break;
-        case TimestampType::Unit::MILLI:
-          date_dtype->meta.base = NPY_FR_ms;
-          break;
-        case TimestampType::Unit::MICRO:
-          date_dtype->meta.base = NPY_FR_us;
-          break;
-        case TimestampType::Unit::NANO:
-          date_dtype->meta.base = NPY_FR_ns;
-          break;
-      }
-    } else {
-      // datatype->type == Type::DATE64
-      date_dtype->meta.base = NPY_FR_D;
-    }
-  }
-}
-
 class PandasBlock {
  public:
   enum type {
@@ -1148,8 +1132,9 @@ static void ConvertBooleanNoNulls(const ChunkedArray& data, uint8_t* out_values)
   }
 }
 
-template <typename ArrayType>
+template <typename Type>
 inline Status ConvertBinaryLike(const ChunkedArray& data, PyObject** out_values) {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
   PyAcquireGIL lock;
   for (int c = 0; c < data.num_chunks(); c++) {
     auto arr = static_cast<ArrayType*>(data.chunk(c).get());
@@ -1287,21 +1272,7 @@ inline void ConvertNumericNullableCast(
   }
 }
 
-template <typename T>
-inline void ConvertDates(const ChunkedArray& data, T na_value, T* out_values) {
-  for (int c = 0; c < data.num_chunks(); c++) {
-    const std::shared_ptr<Array> arr = data.chunk(c);
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const T*>(prim_arr->data()->data());
-
-    for (int64_t i = 0; i < arr->length(); ++i) {
-      // There are 1000 * 60 * 60 * 24 = 86400000ms in a day
-      *out_values++ = arr->IsNull(i) ? na_value : in_values[i] / 86400000;
-    }
-  }
-}
-
-template <typename InType, int SHIFT>
+template <typename InType, int64_t SHIFT>
 inline void ConvertDatetimeNanos(const ChunkedArray& data, int64_t* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
     const std::shared_ptr<Array> arr = data.chunk(c);
@@ -1339,9 +1310,9 @@ class ObjectBlock : public PandasBlock {
     if (type == Type::BOOL) {
       RETURN_NOT_OK(ConvertBooleanWithNulls(data, out_buffer));
     } else if (type == Type::BINARY) {
-      RETURN_NOT_OK(ConvertBinaryLike<BinaryArray>(data, out_buffer));
+      RETURN_NOT_OK(ConvertBinaryLike<BinaryType>(data, out_buffer));
     } else if (type == Type::STRING) {
-      RETURN_NOT_OK(ConvertBinaryLike<StringArray>(data, out_buffer));
+      RETURN_NOT_OK(ConvertBinaryLike<StringType>(data, out_buffer));
     } else if (type == Type::FIXED_SIZE_BINARY) {
       RETURN_NOT_OK(ConvertFixedSizeBinary(data, out_buffer));
     } else if (type == Type::DECIMAL) {
@@ -1532,7 +1503,11 @@ class DatetimeBlock : public PandasBlock {
 
     const ChunkedArray& data = *col.get()->data();
 
-    if (type == Type::DATE64) {
+    if (type == Type::DATE32) {
+      // Date64Type is millisecond timestamp stored as int64_t
+      // TODO(wesm): Do we want to make sure to zero out the milliseconds?
+      ConvertDatetimeNanos<int32_t, kNanosecondsInDay>(data, out_buffer);
+    } else if (type == Type::DATE64) {
       // Date64Type is millisecond timestamp stored as int64_t
       // TODO(wesm): Do we want to make sure to zero out the milliseconds?
       ConvertDatetimeNanos<int64_t, 1000000L>(data, out_buffer);
@@ -1779,6 +1754,9 @@ class DataFrameBlockCreator {
         case Type::FIXED_SIZE_BINARY:
           output_type = PandasBlock::OBJECT;
           break;
+        case Type::DATE32:
+          output_type = PandasBlock::DATETIME;
+          break;
         case Type::DATE64:
           output_type = PandasBlock::DATETIME;
           break;
@@ -1960,6 +1938,34 @@ class DataFrameBlockCreator {
   BlockMap datetimetz_blocks_;
 };
 
+inline void set_numpy_metadata(int type, DataType* datatype, PyArrayObject* out) {
+  if (type == NPY_DATETIME) {
+    PyArray_Descr* descr = PyArray_DESCR(out);
+    auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+    if (datatype->type == Type::TIMESTAMP) {
+      auto timestamp_type = static_cast<TimestampType*>(datatype);
+
+      switch (timestamp_type->unit) {
+        case TimestampType::Unit::SECOND:
+          date_dtype->meta.base = NPY_FR_s;
+          break;
+        case TimestampType::Unit::MILLI:
+          date_dtype->meta.base = NPY_FR_ms;
+          break;
+        case TimestampType::Unit::MICRO:
+          date_dtype->meta.base = NPY_FR_us;
+          break;
+        case TimestampType::Unit::NANO:
+          date_dtype->meta.base = NPY_FR_ns;
+          break;
+      }
+    } else {
+      // datatype->type == Type::DATE64
+      date_dtype->meta.base = NPY_FR_D;
+    }
+  }
+}
+
 class ArrowDeserializer {
  public:
   ArrowDeserializer(const std::shared_ptr<Column>& col, PyObject* py_ref)
@@ -2024,51 +2030,14 @@ class ArrowDeserializer {
   // Allocate new array and deserialize. Can do a zero copy conversion for some
   // types
 
-  Status Convert(PyObject** out) {
-#define CONVERT_CASE(TYPE)                      \
-  case Type::TYPE: {                            \
-    RETURN_NOT_OK(ConvertValues<Type::TYPE>()); \
-  } break;
-
-    switch (col_->type()->type) {
-      CONVERT_CASE(BOOL);
-      CONVERT_CASE(INT8);
-      CONVERT_CASE(INT16);
-      CONVERT_CASE(INT32);
-      CONVERT_CASE(INT64);
-      CONVERT_CASE(UINT8);
-      CONVERT_CASE(UINT16);
-      CONVERT_CASE(UINT32);
-      CONVERT_CASE(UINT64);
-      CONVERT_CASE(FLOAT);
-      CONVERT_CASE(DOUBLE);
-      CONVERT_CASE(BINARY);
-      CONVERT_CASE(STRING);
-      CONVERT_CASE(FIXED_SIZE_BINARY);
-      CONVERT_CASE(DATE64);
-      CONVERT_CASE(TIMESTAMP);
-      CONVERT_CASE(DICTIONARY);
-      CONVERT_CASE(LIST);
-      CONVERT_CASE(DECIMAL);
-      default: {
-        std::stringstream ss;
-        ss << "Arrow type reading not implemented for " << col_->type()->ToString();
-        return Status::NotImplemented(ss.str());
-      }
-    }
-
-#undef CONVERT_CASE
-
-    *out = result_;
-    return Status::OK();
-  }
+  template <typename Type>
+  typename std::enable_if<std::is_base_of<FloatingPoint, Type>::value, Status>::type
+  Visit(const Type& type) {
+    constexpr int TYPE = Type::type_id;
+    using traits = arrow_traits<TYPE>;
 
-  template <int TYPE>
-  inline typename std::enable_if<
-      (TYPE != Type::DATE64) & arrow_traits<TYPE>::is_numeric_nullable, Status>::type
-  ConvertValues() {
-    typedef typename arrow_traits<TYPE>::T T;
-    int npy_type = arrow_traits<TYPE>::npy_type;
+    typedef typename traits::T T;
+    int npy_type = traits::npy_type;
 
     if (data_.num_chunks() == 1 && data_.null_count() == 0 && py_ref_ != nullptr) {
       return ConvertValuesZeroCopy<TYPE>(npy_type, data_.chunk(0));
@@ -2076,31 +2045,56 @@ class ArrowDeserializer {
 
     RETURN_NOT_OK(AllocateOutput(npy_type));
     auto out_values = reinterpret_cast<T*>(PyArray_DATA(arr_));
-    ConvertNumericNullable<T>(data_, arrow_traits<TYPE>::na_value, out_values);
+    ConvertNumericNullable<T>(data_, traits::na_value, out_values);
 
     return Status::OK();
   }
 
-  template <int TYPE>
-  inline typename std::enable_if<TYPE == Type::DATE64, Status>::type ConvertValues() {
-    typedef typename arrow_traits<TYPE>::T T;
+  template <typename Type>
+  typename std::enable_if<std::is_base_of<DateType, Type>::value ||
+                              std::is_base_of<TimestampType, Type>::value,
+      Status>::type
+  Visit(const Type& type) {
+    constexpr int TYPE = Type::type_id;
+    using traits = arrow_traits<TYPE>;
+
+    typedef typename traits::T T;
 
-    RETURN_NOT_OK(AllocateOutput(arrow_traits<TYPE>::npy_type));
+    RETURN_NOT_OK(AllocateOutput(traits::npy_type));
     auto out_values = reinterpret_cast<T*>(PyArray_DATA(arr_));
-    ConvertDates<T>(data_, arrow_traits<TYPE>::na_value, out_values);
+
+    constexpr T na_value = traits::na_value;
+    constexpr int64_t kShift = traits::npy_shift;
+
+    for (int c = 0; c < data_.num_chunks(); c++) {
+      const std::shared_ptr<Array> arr = data_.chunk(c);
+      auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
+      auto in_values = reinterpret_cast<const T*>(prim_arr->data()->data());
+
+      for (int64_t i = 0; i < arr->length(); ++i) {
+        *out_values++ = arr->IsNull(i) ? na_value : in_values[i] / kShift;
+      }
+    }
     return Status::OK();
   }
 
+  template <typename Type>
+  typename std::enable_if<std::is_base_of<TimeType, Type>::value, Status>::type Visit(
+      const Type& type) {
+    return Status::NotImplemented("Don't know how to serialize Arrow time type to NumPy");
+  }
+
   // Integer specialization
-  template <int TYPE>
-  inline
-      typename std::enable_if<arrow_traits<TYPE>::is_numeric_not_nullable, Status>::type
-      ConvertValues() {
-    typedef typename arrow_traits<TYPE>::T T;
-    int npy_type = arrow_traits<TYPE>::npy_type;
+  template <typename Type>
+  typename std::enable_if<std::is_base_of<Integer, Type>::value, Status>::type Visit(
+      const Type& type) {
+    constexpr int TYPE = Type::type_id;
+    using traits = arrow_traits<TYPE>;
+
+    typedef typename traits::T T;
 
     if (data_.num_chunks() == 1 && data_.null_count() == 0 && py_ref_ != nullptr) {
-      return ConvertValuesZeroCopy<TYPE>(npy_type, data_.chunk(0));
+      return ConvertValuesZeroCopy<TYPE>(traits::npy_type, data_.chunk(0));
     }
 
     if (data_.null_count() > 0) {
@@ -2108,7 +2102,7 @@ class ArrowDeserializer {
       auto out_values = reinterpret_cast<double*>(PyArray_DATA(arr_));
       ConvertIntegerWithNulls<T>(data_, out_values);
     } else {
-      RETURN_NOT_OK(AllocateOutput(arrow_traits<TYPE>::npy_type));
+      RETURN_NOT_OK(AllocateOutput(traits::npy_type));
       auto out_values = reinterpret_cast<T*>(PyArray_DATA(arr_));
       ConvertIntegerNoNullsSameType<T>(data_, out_values);
     }
@@ -2117,15 +2111,13 @@ class ArrowDeserializer {
   }
 
   // Boolean specialization
-  template <int TYPE>
-  inline typename std::enable_if<arrow_traits<TYPE>::is_boolean, Status>::type
-  ConvertValues() {
+  Status Visit(const BooleanType& type) {
     if (data_.null_count() > 0) {
       RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
       auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
       RETURN_NOT_OK(ConvertBooleanWithNulls(data_, out_values));
     } else {
-      RETURN_NOT_OK(AllocateOutput(arrow_traits<TYPE>::npy_type));
+      RETURN_NOT_OK(AllocateOutput(arrow_traits<Type::BOOL>::npy_type));
       auto out_values = reinterpret_cast<uint8_t*>(PyArray_DATA(arr_));
       ConvertBooleanNoNulls(data_, out_values);
     }
@@ -2133,43 +2125,32 @@ class ArrowDeserializer {
   }
 
   // UTF8 strings
-  template <int TYPE>
-  inline typename std::enable_if<TYPE == Type::STRING, Status>::type ConvertValues() {
+  template <typename Type>
+  typename std::enable_if<std::is_base_of<BinaryType, Type>::value, Status>::type Visit(
+      const Type& type) {
     RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
     auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return ConvertBinaryLike<StringArray>(data_, out_values);
-  }
-
-  // Binary strings
-  template <int T2>
-  inline typename std::enable_if<T2 == Type::BINARY, Status>::type ConvertValues() {
-    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
-    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return ConvertBinaryLike<BinaryArray>(data_, out_values);
+    return ConvertBinaryLike<Type>(data_, out_values);
   }
 
   // Fixed length binary strings
-  template <int TYPE>
-  inline typename std::enable_if<TYPE == Type::FIXED_SIZE_BINARY, Status>::type
-  ConvertValues() {
+  Status Visit(const FixedSizeBinaryType& type) {
     RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
     auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
     return ConvertFixedSizeBinary(data_, out_values);
   }
 
-  template <int TYPE>
-  inline typename std::enable_if<TYPE == Type::DECIMAL, Status>::type ConvertValues() {
+  Status Visit(const DecimalType& type) {
     RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
     auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
     return ConvertDecimals(data_, out_values);
   }
 
+  Status Visit(const ListType& type) {
 #define CONVERTVALUES_LISTSLIKE_CASE(ArrowType, ArrowEnum) \
   case Type::ArrowEnum:                                    \
     return ConvertListsLike<ArrowType>(col_, out_values);
 
-  template <int T2>
-  inline typename std::enable_if<T2 == Type::LIST, Status>::type ConvertValues() {
     RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
     auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
     auto list_type = std::static_pointer_cast<ListType>(col_->type());
@@ -2193,10 +2174,10 @@ class ArrowDeserializer {
         return Status::NotImplemented(ss.str());
       }
     }
+#undef CONVERTVALUES_LISTSLIKE_CASE
   }
 
-  template <int TYPE>
-  inline typename std::enable_if<TYPE == Type::DICTIONARY, Status>::type ConvertValues() {
+  Status Visit(const DictionaryType& type) {
     std::shared_ptr<PandasBlock> block;
     RETURN_NOT_OK(MakeCategoricalBlock(col_->type(), col_->length(), &block));
     RETURN_NOT_OK(block->Write(col_, 0, 0));
@@ -2216,6 +2197,18 @@ class ArrowDeserializer {
     return Status::OK();
   }
 
+  Status Visit(const NullType& type) { return Status::NotImplemented("null type"); }
+
+  Status Visit(const StructType& type) { return Status::NotImplemented("struct type"); }
+
+  Status Visit(const UnionType& type) { return Status::NotImplemented("union type"); }
+
+  Status Convert(PyObject** out) {
+    RETURN_NOT_OK(VisitTypeInline(*col_->type(), this));
+    *out = result_;
+    return Status::OK();
+  }
+
  private:
   std::shared_ptr<Column> col_;
   const ChunkedArray& data_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/python/pandas_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.h b/cpp/src/arrow/python/pandas_convert.h
index 8fd3107..4d32c8b 100644
--- a/cpp/src/arrow/python/pandas_convert.h
+++ b/cpp/src/arrow/python/pandas_convert.h
@@ -71,9 +71,6 @@ ARROW_EXPORT
 Status PandasObjectsToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo,
     const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out);
 
-ARROW_EXPORT
-Status InvalidConversion(PyObject* obj, const std::string& expected_type_name);
-
 }  // namespace py
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/python/type_traits.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/type_traits.h b/cpp/src/arrow/python/type_traits.h
index f78dc36..c464d65 100644
--- a/cpp/src/arrow/python/type_traits.h
+++ b/cpp/src/arrow/python/type_traits.h
@@ -119,9 +119,6 @@ template <>
 struct arrow_traits<Type::BOOL> {
   static constexpr int npy_type = NPY_BOOL;
   static constexpr bool supports_nulls = false;
-  static constexpr bool is_boolean = true;
-  static constexpr bool is_numeric_not_nullable = false;
-  static constexpr bool is_numeric_nullable = false;
 };
 
 #define INT_DECL(TYPE)                                     \
@@ -130,9 +127,6 @@ struct arrow_traits<Type::BOOL> {
     static constexpr int npy_type = NPY_##TYPE;            \
     static constexpr bool supports_nulls = false;          \
     static constexpr double na_value = NAN;                \
-    static constexpr bool is_boolean = false;              \
-    static constexpr bool is_numeric_not_nullable = true;  \
-    static constexpr bool is_numeric_nullable = false;     \
     typedef typename npy_traits<NPY_##TYPE>::value_type T; \
   };
 
@@ -150,9 +144,6 @@ struct arrow_traits<Type::FLOAT> {
   static constexpr int npy_type = NPY_FLOAT32;
   static constexpr bool supports_nulls = true;
   static constexpr float na_value = NAN;
-  static constexpr bool is_boolean = false;
-  static constexpr bool is_numeric_not_nullable = false;
-  static constexpr bool is_numeric_nullable = true;
   typedef typename npy_traits<NPY_FLOAT32>::value_type T;
 };
 
@@ -161,33 +152,63 @@ struct arrow_traits<Type::DOUBLE> {
   static constexpr int npy_type = NPY_FLOAT64;
   static constexpr bool supports_nulls = true;
   static constexpr double na_value = NAN;
-  static constexpr bool is_boolean = false;
-  static constexpr bool is_numeric_not_nullable = false;
-  static constexpr bool is_numeric_nullable = true;
   typedef typename npy_traits<NPY_FLOAT64>::value_type T;
 };
 
 static constexpr int64_t kPandasTimestampNull = std::numeric_limits<int64_t>::min();
 
+constexpr int64_t kNanosecondsInDay = 86400000000000LL;
+
 template <>
 struct arrow_traits<Type::TIMESTAMP> {
   static constexpr int npy_type = NPY_DATETIME;
+  static constexpr int64_t npy_shift = 1;
+
   static constexpr bool supports_nulls = true;
   static constexpr int64_t na_value = kPandasTimestampNull;
-  static constexpr bool is_boolean = false;
-  static constexpr bool is_numeric_not_nullable = false;
-  static constexpr bool is_numeric_nullable = true;
   typedef typename npy_traits<NPY_DATETIME>::value_type T;
 };
 
 template <>
+struct arrow_traits<Type::DATE32> {
+  // Data stores as FR_D day unit
+  static constexpr int npy_type = NPY_DATETIME;
+  static constexpr int64_t npy_shift = 1;
+
+  static constexpr bool supports_nulls = true;
+  typedef typename npy_traits<NPY_DATETIME>::value_type T;
+
+  static constexpr int64_t na_value = kPandasTimestampNull;
+  static inline bool isnull(int64_t v) { return npy_traits<NPY_DATETIME>::isnull(v); }
+};
+
+template <>
 struct arrow_traits<Type::DATE64> {
+  // Data stores as FR_D day unit
   static constexpr int npy_type = NPY_DATETIME;
+
+  // There are 1000 * 60 * 60 * 24 = 86400000ms in a day
+  static constexpr int64_t npy_shift = 86400000;
+
+  static constexpr bool supports_nulls = true;
+  typedef typename npy_traits<NPY_DATETIME>::value_type T;
+
+  static constexpr int64_t na_value = kPandasTimestampNull;
+  static inline bool isnull(int64_t v) { return npy_traits<NPY_DATETIME>::isnull(v); }
+};
+
+template <>
+struct arrow_traits<Type::TIME32> {
+  static constexpr int npy_type = NPY_OBJECT;
   static constexpr bool supports_nulls = true;
   static constexpr int64_t na_value = kPandasTimestampNull;
-  static constexpr bool is_boolean = false;
-  static constexpr bool is_numeric_not_nullable = false;
-  static constexpr bool is_numeric_nullable = true;
+  typedef typename npy_traits<NPY_DATETIME>::value_type T;
+};
+
+template <>
+struct arrow_traits<Type::TIME64> {
+  static constexpr int npy_type = NPY_OBJECT;
+  static constexpr bool supports_nulls = true;
   typedef typename npy_traits<NPY_DATETIME>::value_type T;
 };
 
@@ -195,18 +216,12 @@ template <>
 struct arrow_traits<Type::STRING> {
   static constexpr int npy_type = NPY_OBJECT;
   static constexpr bool supports_nulls = true;
-  static constexpr bool is_boolean = false;
-  static constexpr bool is_numeric_not_nullable = false;
-  static constexpr bool is_numeric_nullable = false;
 };
 
 template <>
 struct arrow_traits<Type::BINARY> {
   static constexpr int npy_type = NPY_OBJECT;
   static constexpr bool supports_nulls = true;
-  static constexpr bool is_boolean = false;
-  static constexpr bool is_numeric_not_nullable = false;
-  static constexpr bool is_numeric_nullable = false;
 };
 
 }  // namespace py

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/python/util/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h
index f704a96..82cf6fc 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -24,7 +24,7 @@
 namespace arrow {
 namespace py {
 
-inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
+static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
   struct tm date = {0};
   date.tm_year = PyDateTime_GET_YEAR(pydate) - 1900;
   date.tm_mon = PyDateTime_GET_MONTH(pydate) - 1;
@@ -36,6 +36,10 @@ inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
   return lrint(difftime(mktime(&date), mktime(&epoch)) * 1000);
 }
 
+static inline int32_t PyDate_to_days(PyDateTime_Date* pydate) {
+  return static_cast<int32_t>(PyDate_to_ms(pydate) / 86400000LL);
+}
+
 }  // namespace py
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index 156c3d1..cdc0238 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -398,62 +398,35 @@ TEST_F(TestTable, AddColumn) {
   ASSERT_TRUE(status.IsInvalid());
 
   // Add column with wrong length
-  auto longer_col = std::make_shared<Column>(
-      schema_->field(0), MakePrimitive<Int32Array>(length + 1));
+  auto longer_col =
+      std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length + 1));
   status = table.AddColumn(0, longer_col, &result);
   ASSERT_TRUE(status.IsInvalid());
 
   // Add column 0 in different places
   ASSERT_OK(table.AddColumn(0, columns_[0], &result));
-  auto ex_schema = std::shared_ptr<Schema>(new Schema({
-      schema_->field(0),
-      schema_->field(0),
-      schema_->field(1),
-      schema_->field(2)}));
+  auto ex_schema = std::shared_ptr<Schema>(new Schema(
+      {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)}));
   std::vector<std::shared_ptr<Column>> ex_columns = {
-      table.column(0),
-      table.column(0),
-      table.column(1),
-      table.column(2)};
+      table.column(0), table.column(0), table.column(1), table.column(2)};
   ASSERT_TRUE(result->Equals(Table(ex_schema, ex_columns)));
 
   ASSERT_OK(table.AddColumn(1, columns_[0], &result));
-  ex_schema = std::shared_ptr<Schema>(new Schema({
-      schema_->field(0),
-      schema_->field(0),
-      schema_->field(1),
-      schema_->field(2)}));
-  ex_columns = {
-      table.column(0),
-      table.column(0),
-      table.column(1),
-      table.column(2)};
+  ex_schema = std::shared_ptr<Schema>(new Schema(
+      {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)}));
+  ex_columns = {table.column(0), table.column(0), table.column(1), table.column(2)};
   ASSERT_TRUE(result->Equals(Table(ex_schema, ex_columns)));
 
   ASSERT_OK(table.AddColumn(2, columns_[0], &result));
-  ex_schema = std::shared_ptr<Schema>(new Schema({
-      schema_->field(0),
-      schema_->field(1),
-      schema_->field(0),
-      schema_->field(2)}));
-  ex_columns = {
-      table.column(0),
-      table.column(1),
-      table.column(0),
-      table.column(2)};
+  ex_schema = std::shared_ptr<Schema>(new Schema(
+      {schema_->field(0), schema_->field(1), schema_->field(0), schema_->field(2)}));
+  ex_columns = {table.column(0), table.column(1), table.column(0), table.column(2)};
   ASSERT_TRUE(result->Equals(Table(ex_schema, ex_columns)));
 
   ASSERT_OK(table.AddColumn(3, columns_[0], &result));
-  ex_schema = std::shared_ptr<Schema>(new Schema({
-      schema_->field(0),
-      schema_->field(1),
-      schema_->field(2),
-      schema_->field(0)}));
-  ex_columns = {
-      table.column(0),
-      table.column(1),
-      table.column(2),
-      table.column(0)};
+  ex_schema = std::shared_ptr<Schema>(new Schema(
+      {schema_->field(0), schema_->field(1), schema_->field(2), schema_->field(0)}));
+  ex_columns = {table.column(0), table.column(1), table.column(2), table.column(0)};
   ASSERT_TRUE(result->Equals(Table(ex_schema, ex_columns)));
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 9b39f77..4c5257b 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -321,11 +321,9 @@ Status Table::RemoveColumn(int i, std::shared_ptr<Table>* out) const {
   return Status::OK();
 }
 
-Status Table::AddColumn(int i, const std::shared_ptr<Column>& col,
-    std::shared_ptr<Table>* out) const {
-  if (i < 0 || i > num_columns() + 1) {
-    return Status::Invalid("Invalid column index.");
-  }
+Status Table::AddColumn(
+    int i, const std::shared_ptr<Column>& col, std::shared_ptr<Table>* out) const {
+  if (i < 0 || i > num_columns() + 1) { return Status::Invalid("Invalid column index."); }
   if (col == nullptr) {
     std::stringstream ss;
     ss << "Column " << i << " was null";
@@ -334,7 +332,7 @@ Status Table::AddColumn(int i, const std::shared_ptr<Column>& col,
   if (col->length() != num_rows_) {
     std::stringstream ss;
     ss << "Added column's length must match table's length. Expected length " << num_rows_
-        << " but got length " << col->length();
+       << " but got length " << col->length();
     return Status::Invalid(ss.str());
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index dcea53d..b15d31b 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -182,8 +182,8 @@ class ARROW_EXPORT Table {
   Status RemoveColumn(int i, std::shared_ptr<Table>* out) const;
 
   /// Add column to the table, producing a new Table
-  Status AddColumn(int i, const std::shared_ptr<Column>& column,
-      std::shared_ptr<Table>* out) const;
+  Status AddColumn(
+      int i, const std::shared_ptr<Column>& column, std::shared_ptr<Table>* out) const;
 
   // @returns: the number of columns in the table
   int num_columns() const { return static_cast<int>(columns_.size()); }

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index df4590f..93cab14 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -258,8 +258,8 @@ std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) {
   }
 }
 
-Status Schema::AddField(int i, const std::shared_ptr<Field>& field,
-    std::shared_ptr<Schema>* out) const {
+Status Schema::AddField(
+    int i, const std::shared_ptr<Field>& field, std::shared_ptr<Schema>* out) const {
   DCHECK_GE(i, 0);
   DCHECK_LE(i, this->num_fields());
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 3a35f56..730cbed 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -633,8 +633,8 @@ class ARROW_EXPORT Schema {
   // Render a string representation of the schema suitable for debugging
   std::string ToString() const;
 
-  Status AddField(int i, const std::shared_ptr<Field>& field,
-      std::shared_ptr<Schema>* out) const;
+  Status AddField(
+      int i, const std::shared_ptr<Field>& field, std::shared_ptr<Schema>* out) const;
   Status RemoveField(int i, std::shared_ptr<Schema>* out) const;
 
   int num_fields() const { return static_cast<int>(fields_.size()); }

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/cpp/src/arrow/util/stl.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/stl.h b/cpp/src/arrow/util/stl.h
index bd25053..bfce111 100644
--- a/cpp/src/arrow/util/stl.h
+++ b/cpp/src/arrow/util/stl.h
@@ -40,8 +40,8 @@ inline std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t i
 }
 
 template <typename T>
-inline std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
-    const T& new_element) {
+inline std::vector<T> AddVectorElement(
+    const std::vector<T>& values, size_t index, const T& new_element) {
   DCHECK_LE(index, values.size());
   std::vector<T> out;
   out.reserve(values.size() + 1);

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index f3d9321..196deed 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -134,7 +134,11 @@ cdef class UInt64Value(ArrayValue):
 cdef class Date32Value(ArrayValue):
 
     def as_py(self):
-        raise NotImplementedError
+        cdef CDate32Array* ap = <CDate32Array*> self.sp_array.get()
+
+        # Shift to seconds since epoch
+        return datetime.datetime.utcfromtimestamp(
+            int(ap.Value(self.index)) * 86400).date()
 
 
 cdef class Date64Value(ArrayValue):

http://git-wip-us.apache.org/repos/asf/arrow/blob/137aade4/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 0504e1d..d1bea0b 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -28,7 +28,7 @@ import pandas as pd
 import pandas.util.testing as tm
 
 from pyarrow.compat import u
-import pyarrow as A
+import pyarrow as pa
 
 from .pandas_examples import dataframe_with_arrays, dataframe_with_lists
 
@@ -67,7 +67,7 @@ class TestPandasConversion(unittest.TestCase):
     def _check_pandas_roundtrip(self, df, expected=None, nthreads=1,
                                 timestamps_to_ms=False, expected_schema=None,
                                 check_dtype=True, schema=None):
-        table = A.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms,
+        table = pa.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms,
                                     schema=schema)
         result = table.to_pandas(nthreads=nthreads)
         if expected_schema:
@@ -78,7 +78,7 @@ class TestPandasConversion(unittest.TestCase):
 
     def _check_array_roundtrip(self, values, expected=None, mask=None,
                                timestamps_to_ms=False, type=None):
-        arr = A.Array.from_numpy(values, timestamps_to_ms=timestamps_to_ms,
+        arr = pa.Array.from_numpy(values, timestamps_to_ms=timestamps_to_ms,
                                  mask=mask, type=type)
         result = arr.to_pandas()
 
@@ -99,23 +99,23 @@ class TestPandasConversion(unittest.TestCase):
     def test_float_no_nulls(self):
         data = {}
         fields = []
-        dtypes = [('f4', A.float32()), ('f8', A.float64())]
+        dtypes = [('f4', pa.float32()), ('f8', pa.float64())]
         num_values = 100
 
         for numpy_dtype, arrow_dtype in dtypes:
             values = np.random.randn(num_values)
             data[numpy_dtype] = values.astype(numpy_dtype)
-            fields.append(A.Field.from_py(numpy_dtype, arrow_dtype))
+            fields.append(pa.Field.from_py(numpy_dtype, arrow_dtype))
 
         df = pd.DataFrame(data)
-        schema = A.Schema.from_fields(fields)
+        schema = pa.Schema.from_fields(fields)
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_float_nulls(self):
         num_values = 100
 
         null_mask = np.random.randint(0, 10, size=num_values) < 3
-        dtypes = [('f4', A.float32()), ('f8', A.float64())]
+        dtypes = [('f4', pa.float32()), ('f8', pa.float64())]
         names = ['f4', 'f8']
         expected_cols = []
 
@@ -124,9 +124,9 @@ class TestPandasConversion(unittest.TestCase):
         for name, arrow_dtype in dtypes:
             values = np.random.randn(num_values).astype(name)
 
-            arr = A.Array.from_numpy(values, null_mask)
+            arr = pa.Array.from_numpy(values, null_mask)
             arrays.append(arr)
-            fields.append(A.Field.from_py(name, arrow_dtype))
+            fields.append(pa.Field.from_py(name, arrow_dtype))
             values[null_mask] = np.nan
 
             expected_cols.append(values)
@@ -134,8 +134,8 @@ class TestPandasConversion(unittest.TestCase):
         ex_frame = pd.DataFrame(dict(zip(names, expected_cols)),
                                 columns=names)
 
-        table = A.Table.from_arrays(arrays, names)
-        assert table.schema.equals(A.Schema.from_fields(fields))
+        table = pa.Table.from_arrays(arrays, names)
+        assert table.schema.equals(pa.Schema.from_fields(fields))
         result = table.to_pandas()
         tm.assert_frame_equal(result, ex_frame)
 
@@ -144,11 +144,11 @@ class TestPandasConversion(unittest.TestCase):
         fields = []
 
         numpy_dtypes = [
-            ('i1', A.int8()), ('i2', A.int16()),
-            ('i4', A.int32()), ('i8', A.int64()),
-            ('u1', A.uint8()), ('u2', A.uint16()),
-            ('u4', A.uint32()), ('u8', A.uint64()),
-            ('longlong', A.int64()), ('ulonglong', A.uint64())
+            ('i1', pa.int8()), ('i2', pa.int16()),
+            ('i4', pa.int32()), ('i8', pa.int64()),
+            ('u1', pa.uint8()), ('u2', pa.uint16()),
+            ('u4', pa.uint32()), ('u8', pa.uint64()),
+            ('longlong', pa.int64()), ('ulonglong', pa.uint64())
         ]
         num_values = 100
 
@@ -158,10 +158,10 @@ class TestPandasConversion(unittest.TestCase):
                                        min(info.max, np.iinfo('i8').max),
                                        size=num_values)
             data[dtype] = values.astype(dtype)
-            fields.append(A.Field.from_py(dtype, arrow_dtype))
+            fields.append(pa.Field.from_py(dtype, arrow_dtype))
 
         df = pd.DataFrame(data)
-        schema = A.Schema.from_fields(fields)
+        schema = pa.Schema.from_fields(fields)
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_integer_with_nulls(self):
@@ -177,7 +177,7 @@ class TestPandasConversion(unittest.TestCase):
         for name in int_dtypes:
             values = np.random.randint(0, 100, size=num_values)
 
-            arr = A.Array.from_numpy(values, null_mask)
+            arr = pa.Array.from_numpy(values, null_mask)
             arrays.append(arr)
 
             expected = values.astype('f8')
@@ -188,7 +188,7 @@ class TestPandasConversion(unittest.TestCase):
         ex_frame = pd.DataFrame(dict(zip(int_dtypes, expected_cols)),
                                 columns=int_dtypes)
 
-        table = A.Table.from_arrays(arrays, int_dtypes)
+        table = pa.Table.from_arrays(arrays, int_dtypes)
         result = table.to_pandas()
 
         tm.assert_frame_equal(result, ex_frame)
@@ -199,8 +199,8 @@ class TestPandasConversion(unittest.TestCase):
         np.random.seed(0)
 
         df = pd.DataFrame({'bools': np.random.randn(num_values) > 0})
-        field = A.Field.from_py('bools', A.bool_())
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('bools', pa.bool_())
+        schema = pa.Schema.from_fields([field])
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_boolean_nulls(self):
@@ -211,16 +211,16 @@ class TestPandasConversion(unittest.TestCase):
         mask = np.random.randint(0, 10, size=num_values) < 3
         values = np.random.randint(0, 10, size=num_values) < 5
 
-        arr = A.Array.from_numpy(values, mask)
+        arr = pa.Array.from_numpy(values, mask)
 
         expected = values.astype(object)
         expected[mask] = None
 
-        field = A.Field.from_py('bools', A.bool_())
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('bools', pa.bool_())
+        schema = pa.Schema.from_fields([field])
         ex_frame = pd.DataFrame({'bools': expected})
 
-        table = A.Table.from_arrays([arr], ['bools'])
+        table = pa.Table.from_arrays([arr], ['bools'])
         assert table.schema.equals(schema)
         result = table.to_pandas()
 
@@ -229,16 +229,16 @@ class TestPandasConversion(unittest.TestCase):
     def test_boolean_object_nulls(self):
         arr = np.array([False, None, True] * 100, dtype=object)
         df = pd.DataFrame({'bools': arr})
-        field = A.Field.from_py('bools', A.bool_())
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('bools', pa.bool_())
+        schema = pa.Schema.from_fields([field])
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_unicode(self):
         repeats = 1000
         values = [u'foo', None, u'bar', u'maƱana', np.nan]
         df = pd.DataFrame({'strings': values * repeats})
-        field = A.Field.from_py('strings', A.string())
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('strings', pa.string())
+        schema = pa.Schema.from_fields([field])
 
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
@@ -246,8 +246,8 @@ class TestPandasConversion(unittest.TestCase):
         values = [u('qux'), b'foo', None, 'bar', 'qux', np.nan]
         df = pd.DataFrame({'strings': values})
 
-        table = A.Table.from_pandas(df)
-        assert table[0].type == A.binary()
+        table = pa.Table.from_pandas(df)
+        assert table[0].type == pa.binary()
 
         values2 = [b'qux', b'foo', None, b'bar', b'qux', np.nan]
         expected = pd.DataFrame({'strings': values2})
@@ -256,8 +256,8 @@ class TestPandasConversion(unittest.TestCase):
     def test_fixed_size_bytes(self):
         values = [b'foo', None, b'bar', None, None, b'hey']
         df = pd.DataFrame({'strings': values})
-        schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
-        table = A.Table.from_pandas(df, schema=schema)
+        schema = pa.Schema.from_fields([pa.field('strings', pa.binary(3))])
+        table = pa.Table.from_pandas(df, schema=schema)
         assert table.schema[0].type == schema[0].type
         assert table.schema[0].name == schema[0].name
         result = table.to_pandas()
@@ -266,9 +266,9 @@ class TestPandasConversion(unittest.TestCase):
     def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
         values = [b'foo', None, b'ba', None, None, b'hey']
         df = pd.DataFrame({'strings': values})
-        schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
-        with self.assertRaises(A.ArrowInvalid):
-            A.Table.from_pandas(df, schema=schema)
+        schema = pa.Schema.from_fields([pa.field('strings', pa.binary(3))])
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.Table.from_pandas(df, schema=schema)
 
     def test_timestamps_notimezone_no_nulls(self):
         df = pd.DataFrame({
@@ -278,8 +278,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437'],
                 dtype='datetime64[ms]')
             })
-        field = A.Field.from_py('datetime64', A.timestamp('ms'))
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('datetime64', pa.timestamp('ms'))
+        schema = pa.Schema.from_fields([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=True,
                                      expected_schema=schema)
 
@@ -290,8 +290,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437699912'],
                 dtype='datetime64[ns]')
             })
-        field = A.Field.from_py('datetime64', A.timestamp('ns'))
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('datetime64', pa.timestamp('ns'))
+        schema = pa.Schema.from_fields([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=False,
                                      expected_schema=schema)
 
@@ -303,8 +303,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437'],
                 dtype='datetime64[ms]')
             })
-        field = A.Field.from_py('datetime64', A.timestamp('ms'))
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('datetime64', pa.timestamp('ms'))
+        schema = pa.Schema.from_fields([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=True,
                                      expected_schema=schema)
 
@@ -315,8 +315,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437699912'],
                 dtype='datetime64[ns]')
             })
-        field = A.Field.from_py('datetime64', A.timestamp('ns'))
-        schema = A.Schema.from_fields([field])
+        field = pa.Field.from_py('datetime64', pa.timestamp('ns'))
+        schema = pa.Schema.from_fields([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=False,
                                      expected_schema=schema)
 
@@ -345,25 +345,77 @@ class TestPandasConversion(unittest.TestCase):
                             .to_frame())
         self._check_pandas_roundtrip(df, timestamps_to_ms=False)
 
-    def test_date(self):
+    def test_date_infer(self):
         df = pd.DataFrame({
             'date': [datetime.date(2000, 1, 1),
                      None,
                      datetime.date(1970, 1, 1),
                      datetime.date(2040, 2, 26)]})
-        table = A.Table.from_pandas(df)
-        field = A.Field.from_py('date', A.date64())
-        schema = A.Schema.from_fields([field])
+        table = pa.Table.from_pandas(df)
+        field = pa.Field.from_py('date', pa.date32())
+        schema = pa.Schema.from_fields([field])
         assert table.schema.equals(schema)
         result = table.to_pandas()
         expected = df.copy()
         expected['date'] = pd.to_datetime(df['date'])
         tm.assert_frame_equal(result, expected)
 
+    def test_date_objects_typed(self):
+        arr = np.array([
+            datetime.date(2017, 4, 3),
+            None,
+            datetime.date(2017, 4, 4),
+            datetime.date(2017, 4, 5)], dtype=object)
+
+        arr_i4 = np.array([17259, -1, 17260, 17261], dtype='int32')
+        arr_i8 = arr_i4.astype('int64') * 86400000
+        mask = np.array([False, True, False, False])
+
+        t32 = pa.date32()
+        t64 = pa.date64()
+
+        a32 = pa.Array.from_numpy(arr, type=t32)
+        a64 = pa.Array.from_numpy(arr, type=t64)
+
+        a32_expected = pa.Array.from_numpy(arr_i4, mask=mask, type=t32)
+        a64_expected = pa.Array.from_numpy(arr_i8, mask=mask, type=t64)
+
+        assert a32.equals(a32_expected)
+        assert a64.equals(a64_expected)
+
+        # Test converting back to pandas
+        colnames = ['date32', 'date64']
+        table = pa.Table.from_arrays([a32, a64], colnames)
+        table_pandas = table.to_pandas()
+
+        ex_values = (np.array(['2017-04-03', '2017-04-04', '2017-04-04',
+                              '2017-04-05'],
+                              dtype='datetime64[D]')
+                     .astype('datetime64[ns]'))
+        ex_values[1] = pd.NaT.value
+        expected_pandas = pd.DataFrame({'date32': ex_values,
+                                        'date64': ex_values},
+                                       columns=colnames)
+        tm.assert_frame_equal(table_pandas, expected_pandas)
+
+    def test_dates_from_integers(self):
+        t1 = pa.date32()
+        t2 = pa.date64()
+
+        arr = np.array([17259, 17260, 17261], dtype='int32')
+        arr2 = arr.astype('int64') * 86400000
+
+        a1 = pa.Array.from_numpy(arr, type=t1)
+        a2 = pa.Array.from_numpy(arr2, type=t2)
+
+        expected = datetime.date(2017, 4, 3)
+        assert a1[0].as_py() == expected
+        assert a2[0].as_py() == expected
+
     def test_column_of_arrays(self):
         df, schema = dataframe_with_arrays()
         self._check_pandas_roundtrip(df, schema=schema, expected_schema=schema)
-        table = A.Table.from_pandas(df, schema=schema)
+        table = pa.Table.from_pandas(df, schema=schema)
         assert table.schema.equals(schema)
 
         for column in df.columns:
@@ -373,7 +425,7 @@ class TestPandasConversion(unittest.TestCase):
     def test_column_of_lists(self):
         df, schema = dataframe_with_lists()
         self._check_pandas_roundtrip(df, schema=schema, expected_schema=schema)
-        table = A.Table.from_pandas(df, schema=schema)
+        table = pa.Table.from_pandas(df, schema=schema)
         assert table.schema.equals(schema)
 
         for column in df.columns:
@@ -410,8 +462,8 @@ class TestPandasConversion(unittest.TestCase):
 
     def test_mixed_types_fails(self):
         data = pd.DataFrame({'a': ['a', 1, 2.0]})
-        with self.assertRaises(A.ArrowException):
-            A.Table.from_pandas(data)
+        with self.assertRaises(pa.ArrowException):
+            pa.Table.from_pandas(data)
 
     def test_strided_data_import(self):
         cases = []
@@ -460,9 +512,9 @@ class TestPandasConversion(unittest.TestCase):
                 decimal.Decimal('1234.439'),
             ]
         })
-        converted = A.Table.from_pandas(expected)
-        field = A.Field.from_py('decimals', A.decimal(7, 3))
-        schema = A.Schema.from_fields([field])
+        converted = pa.Table.from_pandas(expected)
+        field = pa.Field.from_py('decimals', pa.decimal(7, 3))
+        schema = pa.Schema.from_fields([field])
         assert converted.schema.equals(schema)
 
     def test_decimal_32_to_pandas(self):
@@ -472,7 +524,7 @@ class TestPandasConversion(unittest.TestCase):
                 decimal.Decimal('1234.439'),
             ]
         })
-        converted = A.Table.from_pandas(expected)
+        converted = pa.Table.from_pandas(expected)
         df = converted.to_pandas()
         tm.assert_frame_equal(df, expected)
 
@@ -483,9 +535,9 @@ class TestPandasConversion(unittest.TestCase):
                 decimal.Decimal('129534.123731'),
             ]
         })
-        converted = A.Table.from_pandas(expected)
-        field = A.Field.from_py('decimals', A.decimal(12, 6))
-        schema = A.Schema.from_fields([field])
+        converted = pa.Table.from_pandas(expected)
+        field = pa.Field.from_py('decimals', pa.decimal(12, 6))
+        schema = pa.Schema.from_fields([field])
         assert converted.schema.equals(schema)
 
     def test_decimal_64_to_pandas(self):
@@ -495,7 +547,7 @@ class TestPandasConversion(unittest.TestCase):
                 decimal.Decimal('129534.123731'),
             ]
         })
-        converted = A.Table.from_pandas(expected)
+        converted = pa.Table.from_pandas(expected)
         df = converted.to_pandas()
         tm.assert_frame_equal(df, expected)
 
@@ -506,9 +558,9 @@ class TestPandasConversion(unittest.TestCase):
                 -decimal.Decimal('314292388910493.12343437128'),
             ]
         })
-        converted = A.Table.from_pandas(expected)
-        field = A.Field.from_py('decimals', A.decimal(26, 11))
-        schema = A.Schema.from_fields([field])
+        converted = pa.Table.from_pandas(expected)
+        field = pa.Field.from_py('decimals', pa.decimal(26, 11))
+        schema = pa.Schema.from_fields([field])
         assert converted.schema.equals(schema)
 
     def test_decimal_128_to_pandas(self):
@@ -518,6 +570,6 @@ class TestPandasConversion(unittest.TestCase):
                 -decimal.Decimal('314292388910493.12343437128'),
             ]
         })
-        converted = A.Table.from_pandas(expected)
+        converted = pa.Table.from_pandas(expected)
         df = converted.to_pandas()
         tm.assert_frame_equal(df, expected)


Mime
View raw message